From c13392ab45104e644151501250593efb336c8ca5 Mon Sep 17 00:00:00 2001
From: liuzhe-lz <40699903+liuzhe-lz@users.noreply.github.com>
Date: Wed, 23 Feb 2022 16:31:49 +0800
Subject: [PATCH 01/14] Update Dockerfile (#4576)

---
 Dockerfile | 88 ++++++++++++++++--------------------------------------
 1 file changed, 26 insertions(+), 62 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index df45803ef2..3d0e693642 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
-FROM nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04
+FROM nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04
 
 ARG NNI_RELEASE
 
@@ -11,84 +11,48 @@ ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get -y update
 RUN apt-get -y install \
-    sudo \
-    apt-utils \
-    git \
-    curl \
-    vim \
-    unzip \
-    wget \
+    automake \
     build-essential \
     cmake \
-    libopenblas-dev \
-    automake \
-    openssh-client \
+    curl \
+    git \
     openssh-server \
-    lsof \
-    python3.6 \
+    python3 \
     python3-dev \
     python3-pip \
-    python3-tk \
-    libcupti-dev
+    sudo \
+    unzip \
+    wget \
+    zip
 RUN apt-get clean
 RUN rm -rf /var/lib/apt/lists/*
 
-#
-# generate python script
-#
 RUN ln -s python3 /usr/bin/python
 
-#
-# update pip
-#
-RUN python3 -m pip install --upgrade pip==20.2.4 setuptools==50.3.2
+RUN python3 -m pip --no-cache-dir install pip==22.0.3 setuptools==60.9.1 wheel==0.37.1
 
-# numpy 1.19.5  scipy 1.5.4
-RUN python3 -m pip --no-cache-dir install numpy==1.19.5 scipy==1.5.4
+RUN python3 -m pip --no-cache-dir install \
+    lightgbm==3.3.2 \
+    numpy==1.22.2 \
+    pandas==1.4.1 \
+    scikit-learn==1.0.2 \
+    scipy==1.8.0
 
-#
-# TensorFlow
-#
-RUN python3 -m pip --no-cache-dir install tensorflow==2.3.1
+RUN python3 -m pip --no-cache-dir install \
+    torch==1.10.2+cu113 \
+    torchvision==0.11.3+cu113 \
+    torchaudio==0.10.2+cu113 \
+    -f https://download.pytorch.org/whl/cu113/torch_stable.html
+RUN python3 -m pip --no-cache-dir install pytorch-lightning==1.5.10
 
-#
-# Keras
-#
-RUN python3 -m pip --no-cache-dir install Keras==2.4.3
+RUN python3 -m pip --no-cache-dir install tensorflow==2.8.0
 
-#
-# PyTorch
-#
-RUN python3 -m pip --no-cache-dir install torch==1.7.1 torchvision==0.8.2 pytorch-lightning==1.3.3
+RUN python3 -m pip --no-cache-dir install azureml==0.2.7 azureml-sdk==1.38.0
 
-#
-# sklearn 0.24.1
-#
-RUN python3 -m pip --no-cache-dir install scikit-learn==0.24.1
-
-#
-# pandas==0.23.4 lightgbm==2.2.2
-#
-RUN python3 -m pip --no-cache-dir install pandas==1.1 lightgbm==2.2.2
-
-#
-# Install NNI
-#
 COPY dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl .
 RUN python3 -m pip install nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl
+RUN rm nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl
 
-# 
-# Vision patch. Need del later
-# 
-COPY test/vso_tools/interim_patch.py .
-RUN python3 interim_patch.py
-
-#
-# install aml package
-#
-RUN python3 -m pip --no-cache-dir install azureml
-RUN python3 -m pip --no-cache-dir install azureml-sdk
-
-ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/root/.local/bin:/usr/bin:/bin:/sbin
+ENV PATH=/root/.local/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/usr/sbin
 
 WORKDIR /root

From 0502e2d584dea8e09762cdda66de817d3834d1f6 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 28 Feb 2022 11:01:38 +0800
Subject: [PATCH 02/14] Contribution guide (#4557)

---
 CONTRIBUTING.md                               |  68 ----
 CONTRIBUTING_zh_CN.md                         |  62 ----
 docs/source/Overview_zh.rst                   | 125 -------
 docs/source/Tutorial/Contributing.rst         |  74 ----
 .../Tutorial/SetupNniDeveloperEnvironment.rst |  68 ----
 docs/source/contribution.rst                  |   7 -
 docs/source/contribution_zh.rst               |   9 -
 docs/source/index.rst                         |   3 +-
 docs/source/index_zh.rst                      |   5 +-
 .../architecture_overview.rst}                |  80 ++---
 docs/source/notes/contributing.rst            | 329 ++++++++++++++++++
 11 files changed, 353 insertions(+), 477 deletions(-)
 delete mode 100644 CONTRIBUTING.md
 delete mode 100644 CONTRIBUTING_zh_CN.md
 delete mode 100644 docs/source/Overview_zh.rst
 delete mode 100644 docs/source/Tutorial/Contributing.rst
 delete mode 100644 docs/source/Tutorial/SetupNniDeveloperEnvironment.rst
 delete mode 100644 docs/source/contribution.rst
 delete mode 100644 docs/source/contribution_zh.rst
 rename docs/source/{Overview.rst => notes/architecture_overview.rst} (55%)
 create mode 100644 docs/source/notes/contributing.rst

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
deleted file mode 100644
index 6ce6186868..0000000000
--- a/CONTRIBUTING.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Contributing to NNI
-
-Welcome, and thank you for your interest in contributing to NNI!
-
-There are many ways in which you can contribute, beyond writing code. The goal of this document is to provide a high-level overview of how you can get involved.
-
-# Provide feedback or ask a question
-
-* [File an issue](https://github.com/microsoft/nni/issues/new/choose) on GitHub.
-* Ask a question with NNI tags on [Stack Overflow](https://stackoverflow.com/questions/tagged/nni?sort=Newest&edited=true).
-* Discuss on the NNI [Gitter](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) in NNI.
-
-Join IM discussion groups:
-|Gitter||WeChat|
-|----|----|----|
-|![image](https://user-images.githubusercontent.com/39592018/80665738-e0574a80-8acc-11ea-91bc-0836dc4cbf89.png)| OR |![image](https://github.com/scarlett2018/nniutil/raw/master/wechat.png)|
-
-
-# Look for an existing issue
-Before you create a new issue, please do a search in [open issues](https://github.com/microsoft/nni/issues) to see if the issue or feature request has already been filed.
-
-Be sure to scan through the [most popular](https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3AFAQ+sort%3Areactions-%2B1-desc) feature requests.
-
-If you find your issue already exists, make relevant comments and add your [reaction](https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments). Use a reaction in place of a "+1" comment:
-
-* 👍 - upvote
-* 👎 - downvote
-
-If you cannot find an existing issue that describes your bug or feature, create a new issue using the guidelines below.
-
-# Writing good bug reports or feature requests
-File a single issue per problem and feature request. Do not enumerate multiple bugs or feature requests in the same issue.
-
-Provide as much information as you think might relevant to the context (thinking the issue is assigning to you, what kinds of info you will need to debug it!!!). To give you a general idea about what kinds of info are useful for developers to dig out the issue, we had provided issue template for you.
-
-Once you had submitted an issue, be sure to follow it for questions and discussions. 
-
-Once the bug is fixed or feature is addressed, be sure to close the issue.
-
-# Contributing fixes or examples
-
-This project welcomes contributions and suggestions. Most contributions require you to agree to a
-Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
-the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
-
-When you submit a pull request, a CLA bot will automatically determine whether you need to provide
-a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
-provided by the bot. You will only need to do this once across all repos using our CLA.
-
-# Code of Conduct
-
-This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
-For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
-contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
-
-# How to Contribute
-
-After getting familiar with contribution agreements, you are ready to create your first PR =), follow the NNI developer tutorials to get start:
-
-* We recommend new contributors to start with simple issues: ['good first issue'](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) or ['help-wanted'](https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22).
-* [NNI developer environment installation tutorial](docs/en_US/Tutorial/SetupNniDeveloperEnvironment.rst)
-* [How to debug](docs/en_US/Tutorial/HowToDebug.rst)
-* If you have any questions on usage, review [FAQ](https://github.com/microsoft/nni/blob/master/docs/en_US/Tutorial/FAQ.rst) first, if there are no relevant issues and answers to your question, try contact NNI dev team and users in [Gitter](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) or [File an issue](https://github.com/microsoft/nni/issues/new/choose) on GitHub.
-* [Customize your own Tuner](docs/en_US/Tuner/CustomizeTuner.rst)
-* [Implement customized TrainingService](docs/en_US/TrainingService/HowToImplementTrainingService.rst)
-* [Implement a new NAS trainer on NNI](docs/en_US/NAS/Advanced.rst)
-* [Customize your own Advisor](docs/en_US/Tuner/CustomizeAdvisor.rst)
-
diff --git a/CONTRIBUTING_zh_CN.md b/CONTRIBUTING_zh_CN.md
deleted file mode 100644
index 1626a8524e..0000000000
--- a/CONTRIBUTING_zh_CN.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# 贡献代码
-
-非常感谢您有兴趣对 NNI 做出贡献！
-
-除了编写代码外，您还可以通过多种方式参与， 本文档的目的是提供一个如何参与贡献的高层次概述。
-
-# 反馈或提问
-
-* 在 Github 上创建 [issue](https://github.com/microsoft/nni/issues/new/choose)。
-* 在 [Stack Overflow](https://stackoverflow.com/questions/tagged/nni?sort=Newest&edited=true) 上使用 nni 标签提问。
-* 在 [Gitter](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 中参与讨论。
-
-加入聊天组：
-| Gitter                                                                                                         |   | 微信                                                                      |
-| -------------------------------------------------------------------------------------------------------------- | - | ----------------------------------------------------------------------- |
-| ![image](https://user-images.githubusercontent.com/39592018/80665738-e0574a80-8acc-11ea-91bc-0836dc4cbf89.png) | 或 | ![image](https://github.com/scarlett2018/nniutil/raw/master/wechat.png) |
-
-
-# 查找现有问题
-在创建新 issue 之前，请在 [open issues](https://github.com/microsoft/nni/issues) 中进行搜索，以查看问题或功能请求是否已经存在。
-
-确保已经浏览了 [最热门](https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3AFAQ+sort%3Areactions-%2B1-desc) 的功能请求。
-
-如果您的问题已经存在，请在下方发表评论或添加[回应](https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments)。 通过回应来代替“+1”评论：
-
-* 👍 - 赞成
-* 👎 - 反对
-
-如果未能找到描述您 Bug 或功能的现有问题，请使用以下指南创建一个新问题。
-
-# 编写良好的错误报告或功能请求
-针对每个错误和功能请求提交一个问题， 不要在同一问题中列举多个 Bug 或功能请求。
-
-尽可能多地提供您认为与上下文相关的信息（思考问题如果分配给您，您需要什么样的信息来调试它）。 为了让您大致了解哪些信息对开发人员解决问题有帮助，我们为您提供了问题模板。
-
-提交问题后，请务必跟进问题并参与讨论。
-
-修正 Bug 或实现功能后，请务必关闭此问题。
-
-# 贡献修复或示例
-
-此项目欢迎任何贡献和建议。 大多数贡献需要您同意参与者许可协议（CLA），来声明您有权并授予我们使用您贡献的权利。 有关详细信息，请访问 https://cla.opensource.microsoft.com。
-
-当你提交拉取请求时，CLA 机器人会自动检查你是否需要提供 CLA，并修饰这个拉取请求（例如标签、注释等）。 只需要按照机器人提供的说明进行操作即可。 CLA 只需要同意一次，就能应用到所有的代码仓库上。
-
-# 行为准则
-
-该项目采用了 [ Microsoft 开源行为准则 ](https://opensource.microsoft.com/codeofconduct/)。 有关详细信息，请参阅[行为守则常见问题解答](https://opensource.microsoft.com/codeofconduct/faq/)或联系 opencode@microsoft.com 咨询问题或评论。
-
-# 参与贡献
-
-熟悉贡献协议后，即可按照 NNI 开发人员教程，创建第一个 PR =)：
-
-* 推荐新贡献者先从简单的问题开始：['good first issue'](https://github.com/Microsoft/nni/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) 或 ['help-wanted'](https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22)。
-* [NNI 开发环境安装教程](docs/zh_CN/Tutorial/SetupNniDeveloperEnvironment.rst)
-* [如何调试](docs/zh_CN/Tutorial/HowToDebug.rst)
-* 如果有使用上的问题，可先查看[常见问题解答](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Tutorial/FAQ.rst)。如果没能解决问题，可通过 [Gitter](https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 联系 NNI 开发团队或在 GitHub 上 [报告问题](https://github.com/microsoft/nni/issues/new/choose)。
-* [自定义 Tuner](docs/zh_CN/Tuner/CustomizeTuner.rst)
-* [实现定制的训练平台](docs/zh_CN/TrainingService/HowToImplementTrainingService.rst)
-* [在 NNI 上实现新的 NAS Trainer](docs/zh_CN/NAS/Advanced.rst)
-* [自定义 Advisor](docs/zh_CN/Tuner/CustomizeAdvisor.rst)
-
diff --git a/docs/source/Overview_zh.rst b/docs/source/Overview_zh.rst
deleted file mode 100644
index be61cc35c3..0000000000
--- a/docs/source/Overview_zh.rst
+++ /dev/null
@@ -1,125 +0,0 @@
-.. 6e45ee0ddd5d0315e5c946149d4f9c31
-
-概述
-========
-
-NNI (Neural Network Intelligence) 是一个工具包，可有效的帮助用户设计并调优机器学习模型的神经网络架构，复杂系统的参数（如超参）等。 NNI 的特性包括：易于使用，可扩展，灵活，高效。
-
-
-* **易于使用**：NNI 可通过 pip 安装。 只需要在代码中添加几行，就可以利用 NNI 来调优参数。 可使用命令行工具或 Web 界面来查看 Experiment。
-* **可扩展**：调优超参或网络结构通常需要大量的计算资源。NNI 在设计时就支持了多种不同的计算资源，如远程服务器组，训练平台（如：OpenPAI，Kubernetes），等等。 根据您配置的培训平台的能力，可以并行运行数百个 Trial 。
-* **灵活**：除了内置的算法，NNI 中还可以轻松集成自定义的超参调优算法，神经网络架构搜索算法，提前终止算法等等。 还可以将 NNI 连接到更多的训练平台上，如云中的虚拟机集群，Kubernetes 服务等等。 此外，NNI 还可以连接到外部环境中的特殊应用和模型上。
-* **高效**：NNI 在系统及算法级别上不断地进行优化。 例如：通过早期的反馈来加速调优过程。
-
-下图显示了 NNI 的体系结构。
-
-
-.. raw:: html
-
-   <p align="center">
-   <img src="https://user-images.githubusercontent.com/16907603/92089316-94147200-ee00-11ea-9944-bf3c4544257f.png" alt="drawing" width="700"/>
-   </p>
-
-
-主要概念
-------------
-
-
-* 
-  *Experiment（实验）*： 表示一次任务，例如，寻找模型的最佳超参组合，或最好的神经网络架构等。 它由 Trial 和自动机器学习算法所组成。
-
-* 
-  *搜索空间*：是模型调优的范围。 例如，超参的取值范围。
-
-* 
-  *Configuration（配置）*：配置是来自搜索空间的实例，每个超参都会有特定的值。
-
-* 
-  *Trial*：是一次独立的尝试，它会使用某组配置（例如，一组超参值，或者特定的神经网络架构）。 Trial 会基于提供的配置来运行。
-
-* 
-  *Tuner（调优器）*：一种自动机器学习算法，会为下一个 Trial 生成新的配置。 新的 Trial 会使用这组配置来运行。
-
-* 
-  *Assessor（评估器）*：分析 Trial 的中间结果（例如，定期评估数据集上的精度），来确定 Trial 是否应该被提前终止。
-
-* 
-  *训练平台*：是 Trial 的执行环境。 根据 Experiment 的配置，可以是本机，远程服务器组，或其它大规模训练平台（如，OpenPAI，Kubernetes）。
-
-Experiment 的运行过程为：Tuner 接收搜索空间并生成配置。 这些配置将被提交到训练平台，如本机，远程服务器组或训练集群。 执行的性能结果会被返回给 Tuner。 然后，再生成并提交新的配置。
-
-每次 Experiment 执行时，用户只需要定义搜索空间，改动几行代码，就能利用 NNI 内置的 Tuner/Assessor 和训练平台来搜索最好的超参组合以及神经网络结构。 基本上分为三步：
-
-..
-
-   步骤一：`定义搜索空间 <Tutorial/SearchSpaceSpec.rst>`__
-
-   步骤二：`改动模型代码 <TrialExample/Trials.rst>`__
-
-   步骤三：`定义实验配置 <Tutorial/ExperimentConfig.rst>`__
-
-
-
-.. raw:: html
-
-   <p align="center">
-   <img src="https://user-images.githubusercontent.com/23273522/51816627-5d13db80-2302-11e9-8f3e-627e260203d5.jpg" alt="drawing"/>
-   </p>
-
-
-可查看 `快速入门 <Tutorial/QuickStart.rst>`__  来调优你的模型或系统。
-
-核心功能
--------------
-
-NNI 提供了并行运行多个实例以查找最佳参数组合的能力。 此功能可用于各种领域，例如，为深度学习模型查找最佳超参数，或查找具有真实数据的数据库和其他复杂系统的最佳配置。
-
-NNI 还希望提供用于机器学习和深度学习的算法工具包，尤其是神经体系结构搜索（NAS）算法，模型压缩算法和特征工程算法。
-
-超参调优
-^^^^^^^^^^^^^^^^^^^^^
-
-这是 NNI 最核心、基本的功能，其中提供了许多流行的 `自动调优算法 <Tuner/BuiltinTuner.rst>`__ （即 Tuner) 以及 `提前终止算法 <Assessor/BuiltinAssessor.rst>`__ （即 Assessor）。 可查看 `快速入门 <Tutorial/QuickStart.rst>`__ 来调优你的模型或系统。 基本上通过以上三步，就能开始 NNI Experiment。
-
-通用 NAS 框架
-^^^^^^^^^^^^^^^^^^^^^
-
-此 NAS 框架可供用户轻松指定候选的神经体系结构，例如，可以为单个层指定多个候选操作（例如，可分离的 conv、扩张 conv），并指定可能的跳过连接。 NNI 将自动找到最佳候选。 另一方面，NAS 框架为其他类型的用户（如，NAS 算法研究人员）提供了简单的接口，以实现新的 NAS 算法。 NAS 详情及用法参考 `这里 <NAS/Overview.rst>`__。
-
-NNI 通过 Trial SDK 支持多种 one-shot（一次性） NAS 算法，如：ENAS、DARTS。 使用这些算法时，不需启动 NNI Experiment。 在 Trial 代码中加入算法，直接运行即可。 如果要调整算法中的超参数，或运行多个实例，可以使用 Tuner 并启动 NNI Experiment。
-
-除了 one-shot NAS 外，NAS 还能以 NNI 模式运行，其中每个候选的网络结构都作为独立 Trial 任务运行。 在此模式下，与超参调优类似，必须启动 NNI Experiment 并为 NAS 选择 Tuner。
-
-模型压缩
-^^^^^^^^^^^^^^^^^
-
-NNI 提供了一个易于使用的模型压缩框架来压缩深度神经网络，压缩后的网络通常具有更小的模型尺寸和更快的推理速度，
-模型性能也不会有明显的下降。 NNI 上的模型压缩包括剪枝和量化算法。 这些算法通过 NNI Trial SDK 提供
-。 可以直接在 Trial 代码中使用，并在不启动 NNI Experiment 的情况下运行 Trial 代码。 用户还可以使用 NNI 模型压缩框架集成自定义的剪枝和量化算法。
-
-模型压缩的详细说明和算法可在 `这里 <Compression/Overview.rst>`__ 找到。
-
-自动特征工程
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-自动特征工程，可以为下游任务找到最有效的特征。 自动特征工程及其用法的详细说明可在 `这里 <FeatureEngineering/Overview.rst>`__ 找到。 通过 NNI Trial SDK 支持，不必创建 NNI Experiment， 只需在 Trial 代码中加入内置的自动特征工程算法，然后直接运行 Trial 代码。 
-
-自动特征工程算法通常有一些超参。 如果要自动调整这些超参，可以利用 NNI 的超参数调优，即选择调优算法（即 Tuner）并启动 NNI Experiment。
-
-了解更多信息
---------------------
-
-
-* `入门 <Tutorial/QuickStart.rst>`__
-* `如何为 NNI 调整代码？ <TrialExample/Trials.rst>`__
-* `NNI 支持哪些 Tuner？ <Tuner/BuiltinTuner.rst>`__
-* `如何自定义 Tuner？ <Tuner/CustomizeTuner.rst>`__
-* `NNI 支持哪些 Assessor？ <Assessor/BuiltinAssessor.rst>`__
-* `如何自定义 Assessor？ <Assessor/CustomizeAssessor.rst>`__
-* `如何在本机上运行 Experiment？ <TrainingService/LocalMode.rst>`__
-* `如何在多机上运行 Experiment？ <TrainingService/RemoteMachineMode.rst>`__
-* `如何在 OpenPAI 上运行 Experiment？ <TrainingService/PaiMode.rst>`__
-* `示例 <TrialExample/MnistExamples.rst>`__
-* `NNI 上的神经网络架构搜索 <NAS/Overview.rst>`__
-* `NNI 上的自动模型压缩 <Compression/Overview.rst>`__
-* `NNI 上的自动特征工程 <FeatureEngineering/Overview.rst>`__
diff --git a/docs/source/Tutorial/Contributing.rst b/docs/source/Tutorial/Contributing.rst
deleted file mode 100644
index 6f2fd50217..0000000000
--- a/docs/source/Tutorial/Contributing.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-Contributing to Neural Network Intelligence (NNI)
-=================================================
-
-Great!! We are always on the lookout for more contributors to our code base.
-
-Firstly, if you are unsure or afraid of anything, just ask or submit the issue or pull request anyways. You won't be yelled at for giving your best effort. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contributions and don't want a wall of rules to get in the way of that.
-
-However, for those individuals who want a bit more guidance on the best way to contribute to the project, read on. This document will cover all the points we're looking for in your contributions, raising your chances of quickly merging or addressing your contributions.
-
-Looking for a quickstart, get acquainted with our `Get Started <QuickStart.rst>`__ guide.
-
-There are a few simple guidelines that you need to follow before providing your hacks.
-
-Raising Issues
---------------
-
-When raising issues, please specify the following:
-
-
-* Setup details needs to be filled as specified in the issue template clearly for the reviewer to check.
-* A scenario where the issue occurred (with details on how to reproduce it).
-* Errors and log messages that are displayed by the software.
-* Any other details that might be useful.
-
-Submit Proposals for New Features
----------------------------------
-
-
-* 
-  There is always something more that is required, to make it easier to suit your use-cases. Feel free to join the discussion on new features or raise a PR with your proposed change.
-
-* 
-  Fork the repository under your own github handle. After cloning the repository. Add, commit, push and sqaush (if necessary) the changes with detailed commit messages to your fork. From where you can proceed to making a pull request.
-
-Contributing to Source Code and Bug Fixes
------------------------------------------
-
-Provide PRs with appropriate tags for bug fixes or enhancements to the source code. Do follow the correct naming conventions and code styles when you work on and do try to implement all code reviews along the way.
-
-If you are looking for How to develop and debug the NNI source code, you can refer to `How to set up NNI developer environment doc <./SetupNniDeveloperEnvironment.rst>`__ file in the ``docs`` folder.
-
-Similarly for `Quick Start <QuickStart.rst>`__. For everything else, refer to `NNI Home page <http://nni.readthedocs.io>`__.
-
-Solve Existing Issues
----------------------
-
-Head over to `issues <https://github.com/Microsoft/nni/issues>`__ to find issues where help is needed from contributors. You can find issues tagged with 'good-first-issue' or 'help-wanted' to contribute in.
-
-A person looking to contribute can take up an issue by claiming it as a comment/assign their Github ID to it. In case there is no PR or update in progress for a week on the said issue, then the issue reopens for anyone to take up again. We need to consider high priority issues/regressions where response time must be a day or so.
-
-Code Styles & Naming Conventions
---------------------------------
-
-* We follow `PEP8 <https://www.python.org/dev/peps/pep-0008/>`__ for Python code and naming conventions, do try to adhere to the same when making a pull request or making a change. One can also take the help of linters such as ``flake8`` or ``pylint``
-* We also follow `NumPy Docstring Style <https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html#example-numpy>`__ for Python Docstring Conventions. During the `documentation building <Contributing.rst#documentation>`__\ , we use `sphinx.ext.napoleon <https://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html>`__ to generate Python API documentation from Docstring.
-* For docstrings, please refer to `numpydoc docstring guide <https://numpydoc.readthedocs.io/en/latest/format.html>`__ and `pandas docstring guide <https://python-sprints.github.io/pandas/guide/pandas_docstring.html>`__
-
-  * For function docstring, **description**, **Parameters**, and **Returns** **Yields** are mandatory.
-  * For class docstring, **description**, **Attributes** are mandatory.
-  * For docstring to describe ``dict``, which is commonly used in our hyper-param format description, please refer to `Internal Guideline on Writing Standards <https://ribokit.github.io/docs/text/>`__
-
-Documentation
--------------
-
-Our documentation is built with :githublink:`sphinx <docs>`.
-
-* Before submitting the documentation change, please **build homepage locally**: ``cd docs/en_US && make html``, then you can see all the built documentation webpage under the folder ``docs/en_US/_build/html``. It's also highly recommended taking care of **every WARNING** during the build, which is very likely the signal of a **deadlink** and other annoying issues.
-
-* 
-  For links, please consider using **relative paths** first. However, if the documentation is written in reStructuredText format, and:
-
-
-  * It's an image link which needs to be formatted with embedded html grammar, please use global URL like ``https://user-images.githubusercontent.com/44491713/51381727-e3d0f780-1b4f-11e9-96ab-d26b9198ba65.png``, which can be automatically generated by dragging picture onto `Github Issue <https://github.com/Microsoft/nni/issues/new>`__ Box.
-  * It cannot be re-formatted by sphinx, such as source code, please use its global URL. For source code that links to our github repo, please use URLs rooted at ``https://github.com/Microsoft/nni/tree/master/`` (:githublink:`mnist.py <examples/trials/mnist-pytorch/mnist.py>` for example).
diff --git a/docs/source/Tutorial/SetupNniDeveloperEnvironment.rst b/docs/source/Tutorial/SetupNniDeveloperEnvironment.rst
deleted file mode 100644
index b1adc58137..0000000000
--- a/docs/source/Tutorial/SetupNniDeveloperEnvironment.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-Setup NNI development environment
-=================================
-
-NNI development environment supports Ubuntu 1604 (or above), and Windows 10 with Python3 64bit.
-
-Installation
-------------
-
-1. Clone source code
-^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   git clone https://github.com/Microsoft/nni.git
-
-Note, if you want to contribute code back, it needs to fork your own NNI repo, and clone from there.
-
-2. Install from source code
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   python3 -m pip install -U -r dependencies/setup.txt
-   python3 -m pip install -r dependencies/develop.txt
-   python3 setup.py develop
-
-This installs NNI in `development mode <https://setuptools.readthedocs.io/en/latest/userguide/development_mode.html>`__,
-so you don't need to reinstall it after edit.
-
-3. Check if the environment is ready
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Now, you can try to start an experiment to check if your environment is ready.
-For example, run the command
-
-.. code-block:: bash
-
-   nnictl create --config examples/trials/mnist-pytorch/config.yml
-
-And open WebUI to check if everything is OK
-
-4. Reload changes
-^^^^^^^^^^^^^^^^^
-
-Python
-******
-
-Nothing to do, the code is already linked to package folders.
-
-TypeScript (Linux and macOS)
-****************************
-
-* If ``ts/nni_manager`` is changed, run ``yarn watch`` under this folder. It will watch and build code continually. The ``nnictl`` need to be restarted to reload NNI manager.
-* If ``ts/webui`` is changed, run ``yarn dev``\ , which will run a mock API server and a webpack dev server simultaneously. Use ``EXPERIMENT`` environment variable (e.g., ``mnist-tfv1-running``\ ) to specify the mock data being used. Built-in mock experiments are listed in ``src/webui/mock``. An example of the full command is ``EXPERIMENT=mnist-tfv1-running yarn dev``.
-
-TypeScript (Windows)
-********************
-
-Currently you must rebuild TypeScript modules with `python3 setup.py build_ts` after edit.
-
-5. Submit Pull Request
-^^^^^^^^^^^^^^^^^^^^^^
-
-All changes are merged to master branch from your forked repo. The description of Pull Request must be meaningful, and useful.
-
-We will review the changes as soon as possible. Once it passes review, we will merge it to master branch.
-
-For more contribution guidelines and coding styles, you can refer to the `contributing document <Contributing.rst>`__.
diff --git a/docs/source/contribution.rst b/docs/source/contribution.rst
deleted file mode 100644
index 6131b37a86..0000000000
--- a/docs/source/contribution.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-###############################
-Contribute to NNI
-###############################
-
-..  toctree::
-    Development Setup<./Tutorial/SetupNniDeveloperEnvironment>
-    Contribution Guide<./Tutorial/Contributing>
\ No newline at end of file
diff --git a/docs/source/contribution_zh.rst b/docs/source/contribution_zh.rst
deleted file mode 100644
index 71584e9fb7..0000000000
--- a/docs/source/contribution_zh.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. 24da49b25d3d36c476a69aceb825cb94
-
-###############################
-贡献代码
-###############################
-
-..  toctree::
-    设置开发环境<./Tutorial/SetupNniDeveloperEnvironment>
-    贡献指南<./Tutorial/Contributing>
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bb40524d17..ed10c0426a 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,7 +20,6 @@ Neural Network Intelligence
     :caption: Advanced Materials
     :hidden:
 
-    Overview
     Auto (Hyper-parameter) Tuning <hyperparameter_tune>
     Neural Architecture Search <nas>
     Model Compression <model_compression>
@@ -41,7 +40,7 @@ Neural Network Intelligence
     Use Cases and Solutions <CommunitySharings/community_sharings>
     Research and Publications <ResearchPublications>
     FAQ <Tutorial/FAQ>
-    How to Contribute <contribution>
+    Contribution Guide <notes/contributing>
     Change Log <Release>
 
 
diff --git a/docs/source/index_zh.rst b/docs/source/index_zh.rst
index e57bd56a1b..b083e591af 100644
--- a/docs/source/index_zh.rst
+++ b/docs/source/index_zh.rst
@@ -1,4 +1,4 @@
-.. 1c1500ed177d6b4badecd72037a24a30
+.. ff683903b57318e8baa425ef7a2afaf1
 
 ###########################
 Neural Network Intelligence
@@ -10,7 +10,6 @@ Neural Network Intelligence
     :titlesonly:
     :hidden:
 
-    概述<Overview>
     安装 <installation>
     入门<Tutorial/QuickStart>
     教程<tutorials>
@@ -22,7 +21,7 @@ Neural Network Intelligence
     示例与解决方案<CommunitySharings/community_sharings>
     研究和出版物 <ResearchPublications>
     常见问题 <Tutorial/FAQ>
-    如何贡献 <contribution>
+    如何贡献 <notes/contributing>
     更改日志 <Release>
 
 
diff --git a/docs/source/Overview.rst b/docs/source/notes/architecture_overview.rst
similarity index 55%
rename from docs/source/Overview.rst
rename to docs/source/notes/architecture_overview.rst
index a4fe5d5dd8..831212b6df 100644
--- a/docs/source/Overview.rst
+++ b/docs/source/notes/architecture_overview.rst
@@ -1,69 +1,49 @@
-Overview
-========
+:orphan:
 
-NNI (Neural Network Intelligence) is a toolkit to help users design and tune machine learning models (e.g., hyperparameters), neural network architectures, or complex system's parameters, in an efficient and automatic way. NNI has several appealing properties: ease-of-use, scalability, flexibility, and efficiency.
+Architecture Overview
+=====================
 
+NNI (Neural Network Intelligence) is a toolkit to help users design and tune machine learning models (e.g., hyperparameters), neural network architectures, or complex system's parameters, in an efficient and automatic way. NNI has several appealing properties: ease-of-use, scalability, flexibility, and efficiency.
 
-* **Ease-of-use**\ : NNI can be easily installed through python pip. Only several lines need to be added to your code in order to use NNI's power. You can use both the commandline tool and WebUI to work with your experiments.
-* **Scalability**\ : Tuning hyperparameters or the neural architecture often demands a large number of computational resources, while NNI is designed to fully leverage different computation resources, such as remote machines, training platforms (e.g., OpenPAI, Kubernetes). Hundreds of trials could run in parallel by depending on the capacity of your configured training platforms.
-* **Flexibility**\ : Besides rich built-in algorithms, NNI allows users to customize various hyperparameter tuning algorithms, neural architecture search algorithms, early stopping algorithms, etc. Users can also extend NNI with more training platforms, such as virtual machines, kubernetes service on the cloud. Moreover, NNI can connect to external environments to tune special applications/models on them.
-* **Efficiency**\ : We are intensively working on more efficient model tuning on both the system and algorithm level. For example, we leverage early feedback to speedup the tuning procedure.
+* **Ease-of-use**: NNI can be easily installed through python pip. Only several lines need to be added to your code in order to use NNI's power. You can use both the commandline tool and WebUI to work with your experiments.
+* **Scalability**: Tuning hyperparameters or the neural architecture often demands a large number of computational resources, while NNI is designed to fully leverage different computation resources, such as remote machines, training platforms (e.g., OpenPAI, Kubernetes). Hundreds of trials could run in parallel by depending on the capacity of your configured training platforms.
+* **Flexibility**: Besides rich built-in algorithms, NNI allows users to customize various hyperparameter tuning algorithms, neural architecture search algorithms, early stopping algorithms, etc. Users can also extend NNI with more training platforms, such as virtual machines, kubernetes service on the cloud. Moreover, NNI can connect to external environments to tune special applications/models on them.
+* **Efficiency**: We are intensively working on more efficient model tuning on both the system and algorithm level. For example, we leverage early feedback to speedup the tuning procedure.
 
 The figure below shows high-level architecture of NNI.
 
 
-.. raw:: html
-
-   <p align="center">
-   <img src="https://user-images.githubusercontent.com/16907603/92089316-94147200-ee00-11ea-9944-bf3c4544257f.png" alt="drawing" width="700"/>
-   </p>
-
+.. image:: https://user-images.githubusercontent.com/16907603/92089316-94147200-ee00-11ea-9944-bf3c4544257f.png
+   :width: 700
 
 Key Concepts
 ------------
 
+* *Experiment*: One task of, for example, finding out the best hyperparameters of a model, finding out the best neural network architecture, etc. It consists of trials and AutoML algorithms.
 
-* 
-  *Experiment*\ : One task of, for example, finding out the best hyperparameters of a model, finding out the best neural network architecture, etc. It consists of trials and AutoML algorithms.
-
-* 
-  *Search Space*\ : The feasible region for tuning the model. For example, the value range of each hyperparameter.
+* *Search Space*: The feasible region for tuning the model. For example, the value range of each hyperparameter.
 
-* 
-  *Configuration*\ : An instance from the search space, that is, each hyperparameter has a specific value.
+* *Configuration*: An instance from the search space, that is, each hyperparameter has a specific value.
 
-* 
-  *Trial*\ : An individual attempt at applying a new configuration (e.g., a set of hyperparameter values, a specific neural architecture, etc.). Trial code should be able to run with the provided configuration.
+* *Trial*: An individual attempt at applying a new configuration (e.g., a set of hyperparameter values, a specific neural architecture, etc.). Trial code should be able to run with the provided configuration.
 
-* 
-  *Tuner*\ : An AutoML algorithm, which generates a new configuration for the next try. A new trial will run with this configuration.
+* *Tuner*: An AutoML algorithm, which generates a new configuration for the next try. A new trial will run with this configuration.
 
-* 
-  *Assessor*\ : Analyze a trial's intermediate results (e.g., periodically evaluated accuracy on test dataset) to tell whether this trial can be early stopped or not.
+* *Assessor*: Analyze a trial's intermediate results (e.g., periodically evaluated accuracy on test dataset) to tell whether this trial can be early stopped or not.
 
-* 
-  *Training Platform*\ : Where trials are executed. Depending on your experiment's configuration, it could be your local machine, or remote servers, or large-scale training platform (e.g., OpenPAI, Kubernetes).
+* *Training Platform*: Where trials are executed. Depending on your experiment's configuration, it could be your local machine, or remote servers, or large-scale training platform (e.g., OpenPAI, Kubernetes).
 
 Basically, an experiment runs as follows: Tuner receives search space and generates configurations. These configurations will be submitted to training platforms, such as the local machine, remote machines, or training clusters. Their performances are reported back to Tuner. Then, new configurations are generated and submitted.
 
 For each experiment, the user only needs to define a search space and update a few lines of code, and then leverage NNI built-in Tuner/Assessor and training platforms to search the best hyperparameters and/or neural architecture. There are basically 3 steps:
 
-..
-
-   Step 1: `Define search space <Tutorial/SearchSpaceSpec.rst>`__
+* Step 1: `Define search space <Tutorial/SearchSpaceSpec.rst>`__
 
-   Step 2: `Update model codes <TrialExample/Trials.rst>`__
+* Step 2: `Update model codes <TrialExample/Trials.rst>`__
 
-   Step 3: `Define Experiment <reference/experiment_config.rst>`__
-
-
-
-.. raw:: html
-
-   <p align="center">
-   <img src="https://user-images.githubusercontent.com/23273522/51816627-5d13db80-2302-11e9-8f3e-627e260203d5.jpg" alt="drawing"/>
-   </p>
+* Step 3: `Define Experiment <reference/experiment_config.rst>`__
 
+.. image:: https://user-images.githubusercontent.com/23273522/51816627-5d13db80-2302-11e9-8f3e-627e260203d5.jpg
 
 For more details about how to run an experiment, please refer to `Get Started <Tutorial/QuickStart.rst>`__.
 
@@ -103,21 +83,3 @@ Automatic Feature Engineering
 Automatic feature engineering is for users to find the best features for their tasks. A detailed description of automatic feature engineering and its usage can be found `here <FeatureEngineering/Overview.rst>`__. It is supported through NNI trial SDK, which means you do not have to create an NNI experiment. Instead, simply import a built-in auto-feature-engineering algorithm in your trial code and directly run your trial code. 
 
 The auto-feature-engineering algorithms usually have a bunch of hyperparameters themselves. If you want to automatically tune those hyperparameters, you can leverage hyperparameter tuning of NNI, that is, choose a tuning algorithm (i.e., tuner) and start an NNI experiment for it.
-
-Learn More
-----------
-
-
-* `Get started <Tutorial/QuickStart.rst>`__
-* `How to adapt your trial code on NNI? <TrialExample/Trials.rst>`__
-* `What are tuners supported by NNI? <Tuner/BuiltinTuner.rst>`__
-* `How to customize your own tuner? <Tuner/CustomizeTuner.rst>`__
-* `What are assessors supported by NNI? <Assessor/BuiltinAssessor.rst>`__
-* `How to customize your own assessor? <Assessor/CustomizeAssessor.rst>`__
-* `How to run an experiment on local? <TrainingService/LocalMode.rst>`__
-* `How to run an experiment on multiple machines? <TrainingService/RemoteMachineMode.rst>`__
-* `How to run an experiment on OpenPAI? <TrainingService/PaiMode.rst>`__
-* `Examples <TrialExample/MnistExamples.rst>`__
-* `Neural Architecture Search on NNI <NAS/Overview.rst>`__
-* `Model Compression on NNI <Compression/Overview.rst>`__
-* `Automatic feature engineering on NNI <FeatureEngineering/Overview.rst>`__
diff --git a/docs/source/notes/contributing.rst b/docs/source/notes/contributing.rst
new file mode 100644
index 0000000000..d6095f8f65
--- /dev/null
+++ b/docs/source/notes/contributing.rst
@@ -0,0 +1,329 @@
+Contribution Guide
+==================
+
+Great! We are always on the lookout for more contributors to our code base.
+
+Firstly, if you are unsure or afraid of anything, just ask or submit the issue or pull request anyways. You won't be yelled at for giving your best effort. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contributions and don't want a wall of rules to get in the way of that.
+
+However, for those individuals who want a bit more guidance on the best way to contribute to the project, read on. This document will cover all the points we're looking for in your contributions, raising your chances of quickly merging or addressing your contributions.
+
+There are a few simple guidelines that you need to follow before providing your hacks.
+
+Bug Reports and Feature Requests
+--------------------------------
+
+If you encountered a problem when using NNI, or have an idea for a new feature, your feedbacks are always welcome. Here are some possible channels:
+
+*  `File an issue <https://github.com/microsoft/nni/issues/new/choose>`_ on GitHub.
+*  Open or participate in a `discussion <https://github.com/microsoft/nni/discussions>`_.
+*  Discuss on the NNI `Gitter <https://gitter.im/Microsoft/nni?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge>`_ in NNI.
+*  Join IM discussion groups:
+
+   .. list-table::
+      :widths: 50 50
+      :header-rows: 1
+
+      * - Gitter
+        - WeChat
+      * - .. image:: https://user-images.githubusercontent.com/39592018/80665738-e0574a80-8acc-11ea-91bc-0836dc4cbf89.png
+        - .. image:: https://github.com/scarlett2018/nniutil/raw/master/wechat.png
+
+Looking for an existing issue
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Before you create a new issue, please do a search in `open issues <https://github.com/microsoft/nni/issues>`_ to see if the issue or feature request has already been filed.
+
+Be sure to scan through the `most popular <https://github.com/microsoft/nni/issues?q=is%3Aopen+is%3Aissue+label%3AFAQ+sort%3Areactions-%2B1-desc>`_ feature requests.
+
+If you find your issue already exists, make relevant comments and add your `reaction <https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments>`_. Use a reaction in place of a "+1" comment:
+
+* 👍 - upvote
+* 👎 - downvote
+
+If you cannot find an existing issue that describes your bug or feature, create a new issue following the guidelines below.
+
+Writing good bug reports or feature requests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* File a single issue per problem and feature request. Do not enumerate multiple bugs or feature requests in the same issue.
+
+* Provide as much information as you think might relevant to the context (thinking the issue is assigning to you, what kinds of info you will need to debug it!!!). To give you a general idea about what kinds of info are useful for developers to dig out the issue, we had provided issue template for you.
+
+* Once you had submitted an issue, be sure to follow it for questions and discussions. 
+
+* Once the bug is fixed or feature is addressed, be sure to close the issue.
+
+Writing code
+------------
+
+There is always something more that is required, to make it easier to suit your use-cases.
+Before starting to write code, we recommend checking for `issues <https://github.com/microsoft/nni/issues>`_ on GitHub or open a new issue to initiate a discussion. There could be cases where people are already working on a fix, or similar features have already been under discussion.
+
+To contribute code, you first need to find the NNI code repo located on `GitHub <https://github.com/microsoft/nni>`_. Firstly, fork the repository under your own GitHub handle. After cloning the repository, add, commit, push and squash (if necessary) the changes with detailed commit messages to your fork. From where you can proceed to making a pull request. The pull request will then be reviewed by our core maintainers before merging into master branch. `Here <https://github.com/firstcontributions/first-contributions>`_ is a step-by-step guide for this process.
+
+Contributions to NNI should follow our code of conduct. Please see details :ref:`here <code-of-conduct>`.
+
+Find the code snippet that concerns you
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The NNI repository is large code-base. High-level speaking, it can be decomposed into several core parts:
+
+* ``nni``: the core Python package that contains most features of hyper-parameter tuner, neural architecture search, model compression.
+* ``ts``: contains ``nni_manager`` that manages experiments and training services, and ``webui`` for visualization.
+* ``pipelines`` and ``test``: unit test and integration test, alongside their configurations.
+
+See :doc:`./architecture_overview` if you are interested in details.
+
+.. _get-started-dev:
+
+Get started with development
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+NNI development environment supports Ubuntu 1604 (or above), and Windows 10 with Python 3.7+ (documentation build requires Python 3.8+). We recommend using `conda <https://docs.conda.io/>`_ on Windows.
+
+1. Fork the NNI's GitHub repository and clone the forked repository to your machine.
+
+   .. code-block:: bash
+
+      git clone https://github.com/<your_github_handle>/nni.git
+
+2. Create a new working branch. Use any name you like.
+
+   .. code-block:: bash
+
+      cd nni
+      git checkout -b feature-xyz
+
+3. Install NNI from source code if you need to modify the source code, and test it.
+
+   .. code-block:: bash
+
+      python3 -m pip install -U -r dependencies/setup.txt
+      python3 -m pip install -r dependencies/develop.txt
+      python3 setup.py develop
+
+   This installs NNI in `development mode <https://setuptools.readthedocs.io/en/latest/userguide/development_mode.html>`_,
+   so you don't need to reinstall it after edit.
+
+4. Try to start an experiment to check if your environment is ready. For example, run the command
+
+   .. code-block:: bash
+
+      nnictl create --config examples/trials/mnist-pytorch/config.yml
+
+   And open WebUI to check if everything is OK. Or check the version of installed NNI,
+
+   .. code-block:: python
+
+      >>> import nni
+      >>> nni.__version__
+      '999.dev0'
+
+   .. note:: Please don't run test under the same folder where the NNI repository is located. As the repository is probably also called ``nni``, it could import the wrong ``nni`` package.
+
+5. Write your code along with tests to verify whether the bug is fixed, or the feature works as expected.
+
+6. Reload changes. For Python, nothing needs to be done, because the code is already linked to package folders. For TypeScript on Linux and MacOS,
+
+   * If ``ts/nni_manager`` is changed, run ``yarn watch`` under this folder. It will watch and build code continually. The ``nnictl`` need to be restarted to reload NNI manager.
+   * If ``ts/webui`` is changed, run ``yarn dev``\ , which will run a mock API server and a webpack dev server simultaneously. Use ``EXPERIMENT`` environment variable (e.g., ``mnist-tfv1-running``\ ) to specify the mock data being used. Built-in mock experiments are listed in ``src/webui/mock``. An example of the full command is ``EXPERIMENT=mnist-tfv1-running yarn dev``.
+
+   For TypeScript on Windows, currently you must rebuild TypeScript modules with `python3 setup.py build_ts` after edit.
+
+7. Commit and push your changes, and submit your pull request!
+
+Coding Tips
+-----------
+
+We expect all contributors to respect the following coding styles and naming conventions upon their contribution.
+
+Python
+^^^^^^
+
+* We follow `PEP8 <https://www.python.org/dev/peps/pep-0008/>`__ for Python code and naming conventions, do try to adhere to the same when making a pull request. Our pull request has a mandatory code scan with ``pylint`` and ``flake8``.
+
+  .. note:: To scan your own code locally, run
+
+     .. code-block:: bash
+
+         python -m pylint --rcfile pylintrc nni
+
+  .. tip:: One can also take the help of auto-format tools such as `autopep8 <https://code.visualstudio.com/docs/python/editing#_formatting>`_, which will automatically resolve most of the styling issues.
+
+* We recommend documenting all the methods and classes in your code. Follow `NumPy Docstring Style <https://numpydoc.readthedocs.io/en/latest/format.html>`__ for Python Docstring Conventions.
+
+  * For function docstring, **description**, **Parameters**, and **Returns** are mandatory.
+  * For class docstring, **description** is mandatory. Optionally **Parameters** and **Attributes**. The parameters of ``__init__`` should be documented in the docstring of class.
+  * For docstring to describe ``dict``, which is commonly used in our hyper-parameter format description, please refer to `Internal Guideline on Writing Standards <https://ribokit.github.io/docs/text/>`_.
+
+  .. tip:: Basically, you can use :ref:`ReStructuredText <restructuredtext-intro>` syntax in docstrings, without some exceptions. For example, custom headings are not allowed in docstrings.
+
+TypeScript
+^^^^^^^^^^
+
+TypeScript code checks can be done with,
+
+.. code-block:: bash
+
+   # for nni manager
+   cd ts/nni_manager
+   yarn eslint
+
+   # for webui
+   cd ts/webui
+   yarn sanity-check
+
+Tests
+-----
+
+When a new feature is added or a bug is fixed, tests are highly recommended to make sure that the fix is effective or the feature won't break in future. There are two types of tests in NNI:
+
+* Unit test (**UT**): each test targets at a specific class / function / module.
+* Integration test (**IT**): each test is an end-to-end example / demo.
+
+Unit test (Python)
+^^^^^^^^^^^^^^^^^^
+
+Python UT are located in ``test/ut/`` folder. We use `pytest <https://docs.pytest.org/>`_ to launch the tests, and the working directory is ``test/ut/``.
+
+.. tip:: pytest can be used on a single file or a single test function.
+
+   .. code-block:: bash
+
+      pytest sdk/test_tuner.py
+      pytest sdk/test_tuner.py::test_tpe
+
+Unit test (TypeScript)
+^^^^^^^^^^^^^^^^^^^^^^
+
+TypeScript UT are paired with TypeScript code. Use ``yarn test`` to run them.
+
+Integration test
+^^^^^^^^^^^^^^^^
+
+The integration tests can be found in ``pipelines/`` folder. 
+
+The integration tests are run on Azure DevOps platform on a daily basis, in order to make sure that our examples and training service integrations work properly. However, for critical changes that have impacts on the core functionalities of NNI, we recommend to `trigger the pipeline on the pull request branch <https://stackoverflow.com/questions/60157818/azure-pipeline-run-build-on-pull-request-branch>`_.
+
+The integration tests won't be automatically triggered on pull requests. You might need to contact the core developers to help you trigger the tests.
+
+Documentation
+-------------
+
+Build and check documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Our documentation is located under ``docs/`` folder. The following command can be used to build the documentation.
+
+.. code-block:: bash
+
+   cd docs
+   make html
+
+.. note::
+
+   If you experience issues in building documentation, and see errors like:
+      
+   * ``Could not import extension xxx (exception: No module named 'xxx')`` : please check your development environment and make sure dependencies have been properly installed: :ref:`get-started-dev`.
+   * ``unsupported pickle protocol: 5``: please upgrade to Python 3.8.
+   * ``autodoc: No module named 'xxx'``: some dependencies in ``dependencies/`` are not installed. In this case, documentation can be still mostly successfully built, but some API reference could be missing.
+
+It's also highly recommended taking care of **every WARNING** during the build, which is very likely the signal of a **deadlink** and other annoying issues. Our code check will also make sure that the documentation build completes with no warning.
+
+The built documentation can be found in ``docs/build/html`` folder.
+
+.. attention:: Always use your web browser to check the documentation before committing your change.
+
+.. tip:: `Live Server <https://github.com/ritwickdey/vscode-live-server>`_ is a great extension if you are looking for a static-files server to serve contents in ``docs/build/html``.
+
+Writing new documents
+^^^^^^^^^^^^^^^^^^^^^
+
+.. |link_example| raw:: html
+
+   <code class="docutils literal notranslate">`Link text &lt;https://domain.invalid/&gt;`_</code>
+
+.. |link_example_2| raw:: html
+
+   <code class="docutils literal notranslate">`Link text &lt;https://domain.invalid/&gt;`__</code>
+
+.. |link_example_3| raw:: html
+
+   <code class="docutils literal notranslate">:doc:`./relative/to/my_doc`</code>
+
+.. |githublink_example| raw:: html
+
+   <code class="docutils literal notranslate">:githublink:`path/to/file.ext`</code>
+
+.. |githublink_example_2| raw:: html
+
+   <code class="docutils literal notranslate">:githublink:`text &lt;path/to/file.ext&gt;`</code>
+
+.. _restructuredtext-intro:
+
+`ReStructuredText <https://docutils.sourceforge.io/docs/user/rst/quickstart.html>`_ is our documentation language. Please find the reference of RST `here <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html>`__.
+
+.. tip:: Sphinx has `an excellent cheatsheet of rst <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html>`_ which contains almost everything you might need to know to write a elegant document.
+
+**Dealing with sections.** ``=`` for sections. ``-`` for subsections. ``^`` for subsubsections. ``"`` for paragraphs.
+
+**Dealing with images.** Images should be put into ``docs/img`` folder. Then, reference the image in the document with relative links. For example, ``.. image:: ../../img/example.png``.
+
+**Dealing with codes.** We recommend using ``.. code-block:: python`` to start a code block. The ``python`` here annotates the syntax highlighting.
+
+**Dealing with links.** Use |link_example_3| for links to another doc (no suffix like ``.rst``). To reference a specific section, please use ``:ref:`` (see `Cross-referencing arbitrary locations <https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#cross-referencing-arbitrary-locations>`_). For general links that ``:doc:`` and ``:ref:`` can't handle, you can also use |link_example| for inline web links. Note that use one underline might cause `"duplicated target name" error <https://stackoverflow.com/questions/27420317/restructured-text-rst-http-links-underscore-vs-use>`_ when multiple targets share the same name. In that case, use double-underline to avoid the error: |link_example_2|.
+
+Other than built-in directives provided by Sphinx, we also provide some custom directives:
+
+* ``.. cardlinkitem::``: A tutorial card, useful in :doc:`../tutorials`.
+* |githublink_example| or |githublink_example_2|: reference a file on the GitHub. Linked to the same commit id as where the documentation is built.
+
+Writing new tutorials
+^^^^^^^^^^^^^^^^^^^^^
+
+Our tutorials are powered by `sphinx-gallery <https://sphinx-gallery.github.io/>`. Sphinx-gallery is an extension that builds an HTML gallery of examples from any set of Python scripts.
+
+To contribute a new tutorial, here are the steps to follow:
+
+1. Create a notebook styled python file. If you want it executed while inserted into documentation, save the file under ``examples/tutorials/``. If your tutorial contains other auxiliary scripts which are not intended to be included into documentation, save them under ``examples/tutorials/scripts/``.
+
+   .. tip:: The syntax to write a "notebook styled python file" is very simple. In essence, you only need to write a slightly well formatted python file. Here is a useful guide of `how to structure your Python scripts for Sphinx-Gallery <https://sphinx-gallery.github.io/stable/syntax.html>`_.
+
+2. Put the tutorials into ``docs/source/tutorials.rst``. You should add it both in ``toctree`` (to make it appear in the sidebar content table), and ``cardlinkitem`` (to create a card link), and specify the appropriate ``header``, ``description``, ``link``, ``image``, ``background`` (for image) and ``tags``.
+
+   ``link`` are the generated link, which is usually ``tutorials/<your_python_file_name>.html``. Some useful images can be found in ``docs/img/thumbnails``, but you can always use your own. Available background colors are: ``red``, ``pink``, ``purple``, ``deep-purple``, ``blue``, ``light-blue``, ``cyan``, ``teal``, ``green``, ``deep-orange``, ``brown``, ``indigo``.
+
+   In case you prefer to write your tutorial in jupyter, you can use `this script <https://gist.github.com/chsasank/7218ca16f8d022e02a9c0deb94a310fe>`_ to convert the notebook to python file. After conversion and addition to the project, please make sure the sections headings etc are in logical order.
+
+3. Build the tutorials. Since some of the tutorials contain complex AutoML examples, it's very inefficient to build them over and over again. Therefore, we cache the built tutorials in ``docs/source/tutorials``, so that the unchanged tutorials won't be rebuilt. To trigger the build, run ``make html``. This will execute the tutorials and convert the scripts into HTML files. How long it takes depends on your tutorial. As ``make html`` is not very debug-friendly, we suggest making the script runnable by itself before using this building tool.
+
+.. note::
+
+   Some useful HOW-TOs in writing new tutorials:
+
+   * `How to force rebuilding one tutorial <https://sphinx-gallery.github.io/stable/configuration.html#rerunning-stale-examples>`_.
+   * `How to add images to notebooks <https://sphinx-gallery.github.io/stable/configuration.html#adding-images-to-notebooks>`_.
+   * `How to reference a tutorial in documentation <https://sphinx-gallery.github.io/stable/advanced.html#cross-referencing>`_.
+
+Chinese translation
+^^^^^^^^^^^^^^^^^^^
+
+We only maintain `a partial set of documents <https://github.com/microsoft/nni/issues/4298>`_ with Chinese translation. If you intend to contribute more, follow the steps:
+
+1. Add a ``xxx_zh.rst`` in the same folder where ``xxx.rst`` exists. 
+2. Run ``python tools/chineselink.py`` under ``docs`` folder, to generate a hash string in your created ``xxx_zh.rst``.
+3. Don't delete the hash string, add your translation after it.
+
+In case you modify an English document with Chinese translation already exists, you also need to run ``python tools/chineselink.py`` first to update the hash string, and update the Chinese translation contents accordingly.
+
+.. _code-of-conduct:
+
+Code of Conduct
+---------------
+
+This project has adopted the `Microsoft Open Source Code of Conduct <https://opensource.microsoft.com/codeofconduct/>`_.
+For more information see the `Code of Conduct FAQ <https://opensource.microsoft.com/codeofconduct/faq/>`_ or contact `opencode@microsoft.com <mailto:opencode@microsoft.com>`_ with any additional questions or comments.
+
+Most contributions require you to agree to a Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
+
+When you submit a pull request, a CLA bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA.

From 268215ad58f113dddd13091f9b109fc2bd286d3e Mon Sep 17 00:00:00 2001
From: J-shang <33053116+J-shang@users.noreply.github.com>
Date: Mon, 28 Feb 2022 13:59:49 +0800
Subject: [PATCH 03/14] Fix seed when test apoz pruner (#4580)

---
 pipelines/fast-test.yml                       |  2 +-
 .../v2/test_iterative_pruner_torch.py         | 36 ++++++++++++----
 test/ut/compression/v2/test_pruner_torch.py   | 42 +++++++++++++------
 3 files changed, 58 insertions(+), 22 deletions(-)

diff --git a/pipelines/fast-test.yml b/pipelines/fast-test.yml
index 1b382a07bb..ca4d7f6f66 100644
--- a/pipelines/fast-test.yml
+++ b/pipelines/fast-test.yml
@@ -177,7 +177,7 @@ stages:
   - job: windows
     pool:
       vmImage: windows-latest
-    timeoutInMinutes: 70
+    timeoutInMinutes: 75
 
     steps:
     - template: templates/install-dependencies.yml
diff --git a/test/ut/compression/v2/test_iterative_pruner_torch.py b/test/ut/compression/v2/test_iterative_pruner_torch.py
index 7e3209073b..3f3d07bf35 100644
--- a/test/ut/compression/v2/test_iterative_pruner_torch.py
+++ b/test/ut/compression/v2/test_iterative_pruner_torch.py
@@ -4,6 +4,7 @@
 import random
 import unittest
 
+import numpy
 import torch
 import torch.nn.functional as F
 
@@ -105,6 +106,17 @@ def test_simulated_annealing_pruner(self):
         sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
         assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
 
+    def test_amc_pruner(self):
+        model = TorchModel()
+        config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.5, 'max_sparsity_per_layer': 0.8}]
+        dummy_input = torch.rand(10, 1, 28, 28)
+        ddpg_params = {'hidden1': 300, 'hidden2': 300, 'lr_c': 1e-3, 'lr_a': 1e-4, 'warmup': 5, 'discount': 1.,
+                       'bsize': 64, 'rmsize': 100, 'window_length': 1, 'tau': 0.01, 'init_delta': 0.5, 'delta_decay': 0.99,
+                       'max_episode_length': 1e9, 'epsilon': 50000}
+        pruner = AMCPruner(10, model, config_list, dummy_input, evaluator, finetuner=finetuner, ddpg_params=ddpg_params, target='flops', log_dir='../../../logs')
+        pruner.compress()
+
+class FixSeedPrunerTestCase(unittest.TestCase):
     def test_auto_compress_pruner(self):
         model = TorchModel()
         config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.8}]
@@ -126,15 +138,21 @@ def test_auto_compress_pruner(self):
         print(sparsity_list)
         assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
 
-    def test_amc_pruner(self):
-        model = TorchModel()
-        config_list = [{'op_types': ['Conv2d'], 'total_sparsity': 0.5, 'max_sparsity_per_layer': 0.8}]
-        dummy_input = torch.rand(10, 1, 28, 28)
-        ddpg_params = {'hidden1': 300, 'hidden2': 300, 'lr_c': 1e-3, 'lr_a': 1e-4, 'warmup': 5, 'discount': 1.,
-                       'bsize': 64, 'rmsize': 100, 'window_length': 1, 'tau': 0.01, 'init_delta': 0.5, 'delta_decay': 0.99,
-                       'max_episode_length': 1e9, 'epsilon': 50000}
-        pruner = AMCPruner(10, model, config_list, dummy_input, evaluator, finetuner=finetuner, ddpg_params=ddpg_params, target='flops', log_dir='../../../logs')
-        pruner.compress()
+    def setUp(self) -> None:
+        # fix seed in order to solve the random failure of ut
+        random.seed(1024)
+        numpy.random.seed(1024)
+        torch.manual_seed(1024)
+
+    def tearDown(self) -> None:
+        # reset seed
+        import time
+        now = int(time.time() * 100)
+        random.seed(now)
+        seed = random.randint(0, 2 ** 32 - 1)
+        random.seed(seed)
+        numpy.random.seed(seed)
+        torch.manual_seed(seed)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/ut/compression/v2/test_pruner_torch.py b/test/ut/compression/v2/test_pruner_torch.py
index 7e462447ae..e4c313f39b 100644
--- a/test/ut/compression/v2/test_pruner_torch.py
+++ b/test/ut/compression/v2/test_pruner_torch.py
@@ -1,8 +1,10 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT license.
 
+import random
 import unittest
 
+import numpy
 import torch
 import torch.nn.functional as F
 
@@ -122,18 +124,6 @@ def test_slim_pruner(self):
         sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
         assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
 
-    def test_activation_apoz_rank_pruner(self):
-        model = TorchModel()
-        config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
-        pruner = ActivationAPoZRankPruner(model=model, config_list=config_list, trainer=trainer,
-                                          traced_optimizer=get_optimizer(model), criterion=criterion, training_batches=5,
-                                          activation='relu', mode='dependency_aware',
-                                          dummy_input=torch.rand(10, 1, 28, 28))
-        pruned_model, masks = pruner.compress()
-        pruner._unwrap_model()
-        sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
-        assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
-
     def test_activation_mean_rank_pruner(self):
         model = TorchModel()
         config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
@@ -177,6 +167,34 @@ def test_movement_pruner(self):
         sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
         assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
 
+class FixSeedPrunerTestCase(unittest.TestCase):
+    def test_activation_apoz_rank_pruner(self):
+        model = TorchModel()
+        config_list = [{'op_types': ['Conv2d'], 'sparsity': 0.8}]
+        pruner = ActivationAPoZRankPruner(model=model, config_list=config_list, trainer=trainer,
+                                            traced_optimizer=get_optimizer(model), criterion=criterion, training_batches=5,
+                                            activation='relu', mode='dependency_aware',
+                                            dummy_input=torch.rand(10, 1, 28, 28))
+        pruned_model, masks = pruner.compress()
+        pruner._unwrap_model()
+        sparsity_list = compute_sparsity_mask2compact(pruned_model, masks, config_list)
+        assert 0.78 < sparsity_list[0]['total_sparsity'] < 0.82
+
+    def setUp(self) -> None:
+        # fix seed in order to solve the random failure of ut
+        random.seed(1024)
+        numpy.random.seed(1024)
+        torch.manual_seed(1024)
+
+    def tearDown(self) -> None:
+        # reset seed
+        import time
+        now = int(time.time() * 100)
+        random.seed(now)
+        seed = random.randint(0, 2 ** 32 - 1)
+        random.seed(seed)
+        numpy.random.seed(seed)
+        torch.manual_seed(seed)
 
 if __name__ == '__main__':
     unittest.main()

From fa5127b38b900c2695ec98976b4b127a1d034be1 Mon Sep 17 00:00:00 2001
From: Weidan Kong <42156564+weidankong@users.noreply.github.com>
Date: Mon, 28 Feb 2022 00:44:25 -0800
Subject: [PATCH 04/14] Bug: reduce liveEnvironmentsCount after environment is
 stopped (#4593)

---
 ts/nni_manager/training_service/reusable/trialDispatcher.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ts/nni_manager/training_service/reusable/trialDispatcher.ts b/ts/nni_manager/training_service/reusable/trialDispatcher.ts
index 4efc4f2b7a..13064435ff 100644
--- a/ts/nni_manager/training_service/reusable/trialDispatcher.ts
+++ b/ts/nni_manager/training_service/reusable/trialDispatcher.ts
@@ -507,6 +507,7 @@ class TrialDispatcher implements TrainingService {
                                 throw new Error(`${environment.id} does not has environment service!`);
                             }
                             await environment.environmentService.stopEnvironment(environment);
+                            liveEnvironmentsCount--;
                             continue;
                         }
 

From 6b828681ba17547506a1b197cf8b2dac68df8a8f Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Mon, 28 Feb 2022 16:44:46 +0800
Subject: [PATCH 05/14] Up-to-date with send/recv API change of tianshou
 (#4586)

---
 nni/retiarii/strategy/_rl_impl.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/nni/retiarii/strategy/_rl_impl.py b/nni/retiarii/strategy/_rl_impl.py
index ce51487a77..fb9bab0bec 100644
--- a/nni/retiarii/strategy/_rl_impl.py
+++ b/nni/retiarii/strategy/_rl_impl.py
@@ -43,6 +43,17 @@ def __getattr__(self, key):
     def reset(self):
         return self.env.reset()
 
+    def send(self, action):
+        # for tianshou >= 0.4.6
+        if action is None:
+            self.result = self.pool.apply_async(self.env.reset)
+        else:
+            self.send_action(action)
+
+    def recv(self):
+        # for tianshou >= 0.4.6
+        return self.result.get()
+
     @staticmethod
     def wait(*args, **kwargs):
         raise NotImplementedError('Async collect is not supported yet.')

From c447249c9f4a160608b1916a8b6b7529ff0b8899 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Tue, 1 Mar 2022 11:31:33 +0800
Subject: [PATCH 06/14] Support loading from `state_dict` of supernet (#4544)

---
 nni/retiarii/codegen/pytorch.py            |  24 ++++-
 nni/retiarii/nn/pytorch/api.py             |  18 +++-
 nni/retiarii/nn/pytorch/component.py       |  12 ++-
 nni/retiarii/nn/pytorch/nasbench101.py     |   2 +
 nni/retiarii/utils.py                      | 118 +++++++++++++++++++++
 test/ut/retiarii/debug_mnist_pytorch.py    |   2 +
 test/ut/retiarii/test_convert.py           |  16 +--
 test/ut/retiarii/test_convert_basic.py     |  18 +---
 test/ut/retiarii/test_convert_models.py    |  19 +---
 test/ut/retiarii/test_convert_operators.py |  18 +---
 test/ut/retiarii/test_convert_pytorch.py   |  27 ++---
 test/ut/retiarii/test_highlevel_apis.py    |  83 ++++++++++++++-
 12 files changed, 280 insertions(+), 77 deletions(-)

diff --git a/nni/retiarii/codegen/pytorch.py b/nni/retiarii/codegen/pytorch.py
index f8a1f46c48..555c5ea1c4 100644
--- a/nni/retiarii/codegen/pytorch.py
+++ b/nni/retiarii/codegen/pytorch.py
@@ -6,6 +6,7 @@
 from typing import Dict, List, Tuple, Any
 
 from nni.retiarii.operation_def.torch_op_def import ToDevice
+from nni.retiarii.utils import STATE_DICT_PY_MAPPING
 from nni.common.device import Device, GPUDevice
 
 from ..graph import IllegalGraphError, Edge, Graph, Node, Model
@@ -97,7 +98,18 @@ def _format_variable_name(name: str, graph_name: str) -> str:
     name = name.replace('/', '__')
 
     # https://stackoverflow.com/questions/3303312/how-do-i-convert-a-string-to-a-valid-variable-name-in-python
-    return re.sub('\W|^(?=\d)','_', name)
+    name = re.sub('\W|^(?=\d)','_', name)
+
+    if name.startswith('__') and (len(name) > 2 and name[2] != '_'):
+        # name can't start with double underscore
+        # it's reserved in Python: https://stackoverflow.com/a/1301409/6837658
+        # but it's actually very common in our generated code
+        name = name[1:]
+    elif name.startswith('_'):
+        # to avoid conflicts between '_' and '__'
+        name = 'i' + name
+
+    return name
 
 
 def generate_cuda_mapping(placement: Dict[Node, Device]) -> Dict[Device, int]:
@@ -125,6 +137,7 @@ def graph_to_pytorch_model(graph_name: str, graph: Graph, placement=None) -> str
     # only need to generate code for module here
     import_pkgs = set()
     node_codes = []
+    node_python_mappings = {}
     cuda_remapped_id = None
     if placement:
         cuda_remapped_id = generate_cuda_mapping(placement)
@@ -138,7 +151,9 @@ def graph_to_pytorch_model(graph_name: str, graph: Graph, placement=None) -> str
             pkg_name = node.operation.get_import_pkg()
             if pkg_name is not None:
                 import_pkgs.add(pkg_name)
-            node_code = node.operation.to_init_code(_format_variable_name(node.name, graph_name))
+
+            py_variable_name = _format_variable_name(node.name, graph_name)
+            node_code = node.operation.to_init_code(py_variable_name)
             if node_code is not None:
                 if placement and node in placement and len(node_code) > 0:
                     if isinstance(placement[node], GPUDevice):
@@ -149,6 +164,11 @@ def graph_to_pytorch_model(graph_name: str, graph: Graph, placement=None) -> str
                 else:
                     node_codes.append(node_code)
 
+                # Map to module hierarchies in original search space python code
+                node_python_mappings[py_variable_name] = node.python_name
+
+    node_codes.append(f'self.{STATE_DICT_PY_MAPPING} = {node_python_mappings}')
+
     if graph.input_node.operation.io_names is None:
         input_code = '*_inputs'
     else:
diff --git a/nni/retiarii/nn/pytorch/api.py b/nni/retiarii/nn/pytorch/api.py
index 8c78e253e3..f711200559 100644
--- a/nni/retiarii/nn/pytorch/api.py
+++ b/nni/retiarii/nn/pytorch/api.py
@@ -11,6 +11,7 @@
 
 from nni.common.serializer import Translatable
 from nni.retiarii.serializer import basic_unit
+from nni.retiarii.utils import STATE_DICT_PY_MAPPING_PARTIAL
 from .utils import Mutable, generate_new_label, get_fixed_value
 
 
@@ -65,9 +66,22 @@ def create_fixed_module(cls, candidates: Union[Dict[str, nn.Module], List[nn.Mod
                             label: Optional[str] = None, **kwargs):
         chosen = get_fixed_value(label)
         if isinstance(candidates, list):
-            return candidates[int(chosen)]
+            result = candidates[int(chosen)]
         else:
-            return candidates[chosen]
+            result = candidates[chosen]
+
+        # map the named hierarchies to support weight inheritance for python engine
+        if hasattr(result, STATE_DICT_PY_MAPPING_PARTIAL):
+            # handle cases where layer choices are nested
+            # already has a mapping, will merge with it
+            prev_mapping = getattr(result, STATE_DICT_PY_MAPPING_PARTIAL)
+            setattr(result, STATE_DICT_PY_MAPPING_PARTIAL, {k: f'{chosen}.{v}' for k, v in prev_mapping.items()})
+        else:
+            # "result" needs to know where to map itself.
+            # Ideally, we should put a _mapping_ in the module where "result" is located,
+            # but it's impossible to put mapping into parent module here.
+            setattr(result, STATE_DICT_PY_MAPPING_PARTIAL, {'__self__': str(chosen)})
+        return result
 
     def __init__(self, candidates: Union[Dict[str, nn.Module], List[nn.Module]], *,
                  prior: Optional[List[float]] = None, label: Optional[str] = None, **kwargs):
diff --git a/nni/retiarii/nn/pytorch/component.py b/nni/retiarii/nn/pytorch/component.py
index 7c681b0242..ae21a0d8e6 100644
--- a/nni/retiarii/nn/pytorch/component.py
+++ b/nni/retiarii/nn/pytorch/component.py
@@ -5,6 +5,8 @@
 import torch
 import torch.nn as nn
 
+from nni.retiarii.utils import STATE_DICT_PY_MAPPING_PARTIAL
+
 from .api import LayerChoice
 from .cell import Cell
 from .nasbench101 import NasBench101Cell, NasBench101Mutator
@@ -38,7 +40,15 @@ def create_fixed_module(cls,
                                           List[nn.Module]],
                             depth: Union[int, Tuple[int, int]], *, label: Optional[str] = None):
         repeat = get_fixed_value(label)
-        return nn.Sequential(*cls._replicate_and_instantiate(blocks, repeat))
+        result = nn.Sequential(*cls._replicate_and_instantiate(blocks, repeat))
+
+        if hasattr(result, STATE_DICT_PY_MAPPING_PARTIAL):
+            # already has a mapping, will merge with it
+            prev_mapping = getattr(result, STATE_DICT_PY_MAPPING_PARTIAL)
+            setattr(result, STATE_DICT_PY_MAPPING_PARTIAL, {k: f'blocks.{v}' for k, v in prev_mapping.items()})
+        else:
+            setattr(result, STATE_DICT_PY_MAPPING_PARTIAL, {'__self__': 'blocks'})
+        return result
 
     def __init__(self,
                  blocks: Union[Callable[[int], nn.Module],
diff --git a/nni/retiarii/nn/pytorch/nasbench101.py b/nni/retiarii/nn/pytorch/nasbench101.py
index 5671af1f62..b08443aded 100644
--- a/nni/retiarii/nn/pytorch/nasbench101.py
+++ b/nni/retiarii/nn/pytorch/nasbench101.py
@@ -304,6 +304,8 @@ def make_list(x): return x if isinstance(x, list) else [x]
             [op_candidates[selected[f'{label}/op{i}']] for i in range(1, num_nodes - 1)],
             adjacency_list, in_features, out_features, num_nodes, projection)
 
+        # FIXME: weight inheritance on nasbench101 is not supported yet
+
     def __init__(self, op_candidates: Union[Dict[str, Callable[[int], nn.Module]], List[Callable[[int], nn.Module]]],
                  in_features: int, out_features: int, projection: Callable[[int, int], nn.Module],
                  max_num_nodes: int = 7, max_num_edges: int = 9, label: Optional[str] = None):
diff --git a/nni/retiarii/utils.py b/nni/retiarii/utils.py
index 425d707ec1..1bf7cfdfd8 100644
--- a/nni/retiarii/utils.py
+++ b/nni/retiarii/utils.py
@@ -2,8 +2,10 @@
 # Licensed under the MIT license.
 
 import inspect
+import itertools
 import warnings
 from collections import defaultdict
+from contextlib import contextmanager
 from typing import Any, List, Dict
 from pathlib import Path
 
@@ -154,3 +156,119 @@ def _simple_name(key: str, lst: List[Any]) -> str:
 
 def get_current_context(key: str) -> Any:
     return ContextStack.top(key)
+
+
+# map variables to prefix in the state dict
+# e.g., {'upsample': 'mynet.module.deconv2.upsample_layer'}
+STATE_DICT_PY_MAPPING = '_mapping_'
+
+# map variables to `prefix`.`value` in the state dict
+# e.g., {'upsample': 'choice3.upsample_layer'},
+# which actually means {'upsample': 'mynet.module.choice3.upsample_layer'},
+# and 'upsample' is also in `mynet.module`.
+STATE_DICT_PY_MAPPING_PARTIAL = '_mapping_partial_'
+
+
+@contextmanager
+def original_state_dict_hooks(model: Any):
+    """
+    Use this patch if you want to save/load state dict in the original state dict hierarchy.
+
+    For example, when you already have a state dict for the base model / search space (which often
+    happens when you have trained a supernet with one-shot strategies), the state dict isn't organized
+    in the same way as when a sub-model is sampled from the search space. This patch will help
+    the modules in the sub-model find the corresponding module in the base model.
+
+    The code looks like,
+
+    .. code-block:: python
+
+        with original_state_dict_hooks(model):
+            model.load_state_dict(state_dict_from_supernet, strict=False)  # supernet has extra keys
+
+    Or vice-versa,
+
+    .. code-block:: python
+
+        with original_state_dict_hooks(model):
+            supernet_style_state_dict = model.state_dict()
+    """
+
+    import torch.nn as nn
+    assert isinstance(model, nn.Module), 'PyTorch is the only supported framework for now.'
+
+    # the following are written for pytorch only
+
+    # first get the full mapping
+    full_mapping = {}
+
+    def full_mapping_in_module(src_prefix, tar_prefix, module):
+        if hasattr(module, STATE_DICT_PY_MAPPING):
+            # only values are complete
+            local_map = getattr(module, STATE_DICT_PY_MAPPING)
+        elif hasattr(module, STATE_DICT_PY_MAPPING_PARTIAL):
+            # keys and values are both incomplete
+            local_map = getattr(module, STATE_DICT_PY_MAPPING_PARTIAL)
+            local_map = {k: tar_prefix + v for k, v in local_map.items()}
+        else:
+            # no mapping
+            local_map = {}
+
+        if '__self__' in local_map:
+            # special case, overwrite prefix
+            tar_prefix = local_map['__self__'] + '.'
+
+        for key, value in local_map.items():
+            if key != '' and key not in module._modules:  # not a sub-module, probably a parameter
+                full_mapping[src_prefix + key] = value
+
+        if src_prefix != tar_prefix:  # To deal with leaf nodes.
+            for name, value in itertools.chain(module._parameters.items(), module._buffers.items()):  # direct children
+                if value is None or name in module._non_persistent_buffers_set:
+                    # it won't appear in state dict
+                    continue
+                if (src_prefix + name) not in full_mapping:
+                    full_mapping[src_prefix + name] = tar_prefix + name
+
+        for name, child in module.named_children():
+            # sub-modules
+            full_mapping_in_module(
+                src_prefix + name + '.',
+                local_map.get(name, tar_prefix + name) + '.',  # if mapping doesn't exist, respect the prefix
+                child
+            )
+
+    full_mapping_in_module('', '', model)
+
+    def load_state_dict_hook(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+        reverse_mapping = defaultdict(list)
+        for src, tar in full_mapping.items():
+            reverse_mapping[tar].append(src)
+
+        transf_state_dict = {}
+        for src, tar_keys in reverse_mapping.items():
+            if src in state_dict:
+                value = state_dict.pop(src)
+                for tar in tar_keys:
+                    transf_state_dict[tar] = value
+            else:
+                missing_keys.append(src)
+        state_dict.update(transf_state_dict)
+
+    def state_dict_hook(module, destination, prefix, local_metadata):
+        result = {}
+        for src, tar in full_mapping.items():
+            if src in destination:
+                result[tar] = destination.pop(src)
+            else:
+                raise KeyError(f'"{src}" not in state dict, but found in mapping.')
+        destination.update(result)
+
+    try:
+        hooks = []
+        hooks.append(model._register_load_state_dict_pre_hook(load_state_dict_hook))
+        hooks.append(model._register_state_dict_hook(state_dict_hook))
+        yield
+    finally:
+        for hook in hooks:
+            hook.remove()
diff --git a/test/ut/retiarii/debug_mnist_pytorch.py b/test/ut/retiarii/debug_mnist_pytorch.py
index c7511a5dbd..80d7355a0a 100644
--- a/test/ut/retiarii/debug_mnist_pytorch.py
+++ b/test/ut/retiarii/debug_mnist_pytorch.py
@@ -16,6 +16,7 @@ def __init__(self):
         self.fc1 = torch.nn.Linear(out_features=256, in_features=1024)
         self.fc2 = torch.nn.Linear(out_features=10, in_features=256)
         self.softmax = torch.nn.Softmax()
+        self._mapping_ = {'stem': None, 'flatten': None, 'fc1': None, 'fc2': None, 'softmax': None}
 
     def forward(self, image):
         stem = self.stem(image)
@@ -34,6 +35,7 @@ def __init__(self):
         self.pool1 = torch.nn.MaxPool2d(kernel_size=2)
         self.conv2 = torch.nn.Conv2d(out_channels=64, in_channels=32, kernel_size=5)
         self.pool2 = torch.nn.MaxPool2d(kernel_size=2)
+        self._mapping_ = {'conv1': None, 'pool1': None, 'conv2': None, 'pool2': None}
 
     def forward(self, *_inputs):
         conv1 = self.conv1(_inputs[0])
diff --git a/test/ut/retiarii/test_convert.py b/test/ut/retiarii/test_convert.py
index c79c7696af..15b3bba00b 100644
--- a/test/ut/retiarii/test_convert.py
+++ b/test/ut/retiarii/test_convert.py
@@ -14,6 +14,7 @@
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii import basic_unit
 from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import original_state_dict_hooks
 
 from .convert_mixin import ConvertMixin, ConvertWithShapeMixin
 
@@ -50,16 +51,6 @@ def forward(self, input):
         return out.view(size[0], size[1], -1)
 
 class TestConvert(unittest.TestCase, ConvertMixin):
-    @staticmethod
-    def _match_state_dict(current_values, expected_format):
-        result = {}
-        for k, v in expected_format.items():
-            for idx, cv in enumerate(current_values):
-                if cv.shape == v.shape:
-                    result[k] = cv
-                    current_values.pop(idx)
-                    break
-        return result
 
     def checkExportImport(self, model, input):
         model_ir = self._convert_model(model, input)
@@ -68,9 +59,8 @@ def checkExportImport(self, model, input):
         exec_vars = {}
         exec(model_code + '\n\nconverted_model = _model()', exec_vars)
         converted_model = exec_vars['converted_model']
-        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
-                                                      dict(converted_model.state_dict()))
-        converted_model.load_state_dict(converted_state_dict)
+        with original_state_dict_hooks(converted_model):
+            converted_model.load_state_dict(dict(model.state_dict()))
         with torch.no_grad():
             expected_output = model.eval()(*input)
             converted_output = converted_model.eval()(*input)
diff --git a/test/ut/retiarii/test_convert_basic.py b/test/ut/retiarii/test_convert_basic.py
index 145f62f636..48f0ba1833 100644
--- a/test/ut/retiarii/test_convert_basic.py
+++ b/test/ut/retiarii/test_convert_basic.py
@@ -12,20 +12,11 @@
 
 from .convert_mixin import ConvertMixin, ConvertWithShapeMixin
 from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import original_state_dict_hooks
 
 # following pytorch v1.7.1
 
 class TestConvert(unittest.TestCase, ConvertMixin):
-    @staticmethod
-    def _match_state_dict(current_values, expected_format):
-        result = {}
-        for k, v in expected_format.items():
-            for idx, cv in enumerate(current_values):
-                if cv.shape == v.shape:
-                    result[k] = cv
-                    current_values.pop(idx)
-                    break
-        return result
 
     def checkExportImport(self, model, input, check_value=True):
         model_ir = self._convert_model(model, input)
@@ -35,9 +26,10 @@ def checkExportImport(self, model, input, check_value=True):
         exec_vars = {}
         exec(model_code + '\n\nconverted_model = _model()', exec_vars)
         converted_model = exec_vars['converted_model']
-        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
-                                                      dict(converted_model.state_dict()))
-        converted_model.load_state_dict(converted_state_dict)
+
+        with original_state_dict_hooks(converted_model):
+            converted_model.load_state_dict(model.state_dict())
+
         with torch.no_grad():
             expected_output = model.eval()(*input)
             converted_output = converted_model.eval()(*input)
diff --git a/test/ut/retiarii/test_convert_models.py b/test/ut/retiarii/test_convert_models.py
index 26851d1e0b..8dba5bbaef 100644
--- a/test/ut/retiarii/test_convert_models.py
+++ b/test/ut/retiarii/test_convert_models.py
@@ -9,23 +9,13 @@
 import torchvision
 
 import nni.retiarii.nn.pytorch as nn
-from nni.retiarii import serialize
 from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import original_state_dict_hooks
 
 from .convert_mixin import ConvertMixin, ConvertWithShapeMixin
 
 
 class TestModels(unittest.TestCase, ConvertMixin):
-    @staticmethod
-    def _match_state_dict(current_values, expected_format):
-        result = {}
-        for k, v in expected_format.items():
-            for idx, cv in enumerate(current_values):
-                if cv.shape == v.shape:
-                    result[k] = cv
-                    current_values.pop(idx)
-                    break
-        return result
 
     def run_test(self, model, input, check_value=True):
         model_ir = self._convert_model(model, input)
@@ -35,9 +25,10 @@ def run_test(self, model, input, check_value=True):
         exec_vars = {}
         exec(model_code + '\n\nconverted_model = _model()', exec_vars)
         converted_model = exec_vars['converted_model']
-        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
-                                                      dict(converted_model.state_dict()))
-        converted_model.load_state_dict(converted_state_dict)
+
+        with original_state_dict_hooks(converted_model):
+            converted_model.load_state_dict(model.state_dict())
+
         with torch.no_grad():
             expected_output = model.eval()(*input)
             converted_output = converted_model.eval()(*input)
diff --git a/test/ut/retiarii/test_convert_operators.py b/test/ut/retiarii/test_convert_operators.py
index 2bc24fff64..73f607973f 100644
--- a/test/ut/retiarii/test_convert_operators.py
+++ b/test/ut/retiarii/test_convert_operators.py
@@ -16,6 +16,7 @@
 
 import nni.retiarii.nn.pytorch as nn
 from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import original_state_dict_hooks
 
 from .convert_mixin import ConvertMixin, ConvertWithShapeMixin
 
@@ -23,16 +24,6 @@
 
 
 class TestOperators(unittest.TestCase, ConvertMixin):
-    @staticmethod
-    def _match_state_dict(current_values, expected_format):
-        result = {}
-        for k, v in expected_format.items():
-            for idx, cv in enumerate(current_values):
-                if cv.shape == v.shape:
-                    result[k] = cv
-                    current_values.pop(idx)
-                    break
-        return result
 
     def checkExportImport(self, model, input, check_value=True):
         model_ir = self._convert_model(model, input)
@@ -42,9 +33,10 @@ def checkExportImport(self, model, input, check_value=True):
         exec_vars = {}
         exec(model_code + '\n\nconverted_model = _model()', exec_vars)
         converted_model = exec_vars['converted_model']
-        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
-                                                      dict(converted_model.state_dict()))
-        converted_model.load_state_dict(converted_state_dict)
+
+        with original_state_dict_hooks(converted_model):
+            converted_model.load_state_dict(model.state_dict())
+
         with torch.no_grad():
             expected_output = model.eval()(*input)
             converted_output = converted_model.eval()(*input)
diff --git a/test/ut/retiarii/test_convert_pytorch.py b/test/ut/retiarii/test_convert_pytorch.py
index 692910642b..6036cde485 100644
--- a/test/ut/retiarii/test_convert_pytorch.py
+++ b/test/ut/retiarii/test_convert_pytorch.py
@@ -14,28 +14,17 @@
 import torchvision
 
 import nni.retiarii.nn.pytorch as nn
-from nni.retiarii import serialize
 from nni.retiarii.codegen import model_to_pytorch_script
+from nni.retiarii.utils import original_state_dict_hooks
 
 from .convert_mixin import ConvertMixin, ConvertWithShapeMixin
 
 
 class TestPytorch(unittest.TestCase, ConvertMixin):
-    @staticmethod
-    def _match_state_dict(current_values, expected_format):
-        result = {}
-        for k, v in expected_format.items():
-            for idx, cv in enumerate(current_values):
-                if cv.shape == v.shape:
-                    result[k] = cv
-                    current_values.pop(idx)
-                    break
-        return result
-
-    def run_test(self, model, input, check_value=True):
+
+    def run_test(self, model, input, check_value=True, strict_load=True):
         model_ir = self._convert_model(model, input)
         model_code = model_to_pytorch_script(model_ir)
-        print(model_code)
 
         from .inject_nn import remove_inject_pytorch_nn
         remove_inject_pytorch_nn()
@@ -43,9 +32,10 @@ def run_test(self, model, input, check_value=True):
         exec_vars = {}
         exec(model_code + '\n\nconverted_model = _model()', exec_vars)
         converted_model = exec_vars['converted_model']
-        converted_state_dict = self._match_state_dict(list(model.state_dict().values()),
-                                                      dict(converted_model.state_dict()))
-        converted_model.load_state_dict(converted_state_dict)
+
+        with original_state_dict_hooks(converted_model):
+            converted_model.load_state_dict(model.state_dict(), strict=strict_load)
+
         with torch.no_grad():
             expected_output = model.eval()(*input)
             converted_output = converted_model.eval()(*input)
@@ -76,7 +66,8 @@ def forward(self, input):
 
         model = LargeModel()
         x = torch.tensor([2], dtype=torch.long)
-        self.run_test(model, (x, ))
+        # emb and lin1 is actually not used so they won't appear in generated model
+        self.run_test(model, (x, ), strict_load=False)
 
     @unittest.skip('skip for now, as it needs inject_nn')
     def test_mobilenet_v2_with_external_data(self):
diff --git a/test/ut/retiarii/test_highlevel_apis.py b/test/ut/retiarii/test_highlevel_apis.py
index 9e8b79fc49..6f3b6b0c7d 100644
--- a/test/ut/retiarii/test_highlevel_apis.py
+++ b/test/ut/retiarii/test_highlevel_apis.py
@@ -17,7 +17,7 @@
 from nni.retiarii.nn.pytorch.api import ValueChoice
 from nni.retiarii.nn.pytorch.mutator import process_evaluator_mutations, process_inline_mutation, extract_mutation_from_pt_module
 from nni.retiarii.serializer import model_wrapper
-from nni.retiarii.utils import ContextStack
+from nni.retiarii.utils import ContextStack, original_state_dict_hooks
 
 
 class EnumerateSampler(Sampler):
@@ -123,6 +123,29 @@ def forward(self, x):
             self.assertEqual(self._get_converted_pytorch_model(model_new)(torch.randn(1, 3, 3, 3)).size(),
                              torch.Size([1, i, 3, 3]))
 
+    def test_layer_choice_weight_inheritance(self):
+        @model_wrapper
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.module = nn.LayerChoice([nn.Conv2d(3, i, kernel_size=1) for i in range(1, 11)])
+
+            def forward(self, x):
+                return self.module(x)
+
+        orig_model = Net()
+        model, mutators = self._get_model_with_mutators(orig_model)
+        mutator = mutators[0].bind_sampler(EnumerateSampler())
+        for i in range(1, 11):
+            model_new = mutator.apply(model)
+            model_new = self._get_converted_pytorch_model(model_new)
+            with original_state_dict_hooks(model_new):
+                model_new.load_state_dict(orig_model.state_dict(), strict=False)
+            inp = torch.randn(1, 3, 3, 3)
+            a = getattr(orig_model.module, str(i - 1))(inp)
+            b = model_new(inp)
+            self.assertLess((a - b).abs().max().item(), 1E-4)
+
     def test_nested_layer_choice(self):
         @model_wrapper
         class Net(nn.Module):
@@ -150,6 +173,40 @@ def forward(self, x):
         self.assertEqual(self._get_converted_pytorch_model(mutators[1].apply(mutators[0].apply(model)))(input).size(),
                          torch.Size([1, 5, 5, 5]))
 
+    def test_nested_layer_choice_weight_inheritance(self):
+        @model_wrapper
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.module = nn.LayerChoice([
+                    nn.LayerChoice([nn.Conv2d(3, 3, kernel_size=1),
+                                    nn.Conv2d(3, 4, kernel_size=1),
+                                    nn.Conv2d(3, 5, kernel_size=1)]),
+                    nn.Conv2d(3, 1, kernel_size=1)
+                ])
+
+            def forward(self, x):
+                return self.module(x)
+
+        orig_model = Net()
+        model, mutators = self._get_model_with_mutators(orig_model)
+        mutators[0].bind_sampler(EnumerateSampler())
+        mutators[1].bind_sampler(EnumerateSampler())
+        input = torch.randn(1, 3, 5, 5)
+
+        for i in range(3):
+            model_new = self._get_converted_pytorch_model(mutators[1].apply(mutators[0].apply(model)))
+            with original_state_dict_hooks(model_new):
+                model_new.load_state_dict(orig_model.state_dict(), strict=False)
+            if i == 0:
+                a = getattr(getattr(orig_model.module, '0'), '0')(input)
+            elif i == 1:
+                a = getattr(orig_model.module, '1')(input)
+            elif i == 2:
+                a = getattr(getattr(orig_model.module, '0'), '2')(input)
+            b = model_new(input)
+            self.assertLess((a - b).abs().max().item(), 1E-4)
+
     def test_input_choice(self):
         @model_wrapper
         class Net(nn.Module):
@@ -578,6 +635,30 @@ def forward(self, x):
 
         self.assertIn(1., result)
 
+    def test_repeat_weight_inheritance(self):
+        @model_wrapper
+        class Net(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.module = nn.Repeat(lambda index: nn.Conv2d(3, 3, 1), (2, 5))
+
+            def forward(self, x):
+                return self.module(x)
+
+        orig_model = Net()
+        model, mutators = self._get_model_with_mutators(orig_model)
+        mutator = mutators[0].bind_sampler(EnumerateSampler())
+        inp = torch.randn(1, 3, 5, 5)
+
+        for i in range(4):
+            model_new = self._get_converted_pytorch_model(mutator.apply(model))
+            with original_state_dict_hooks(model_new):
+                model_new.load_state_dict(orig_model.state_dict(), strict=False)
+
+            a = nn.Sequential(*orig_model.module.blocks[:i + 2])(inp)
+            b = model_new(inp)
+            self.assertLess((a - b).abs().max().item(), 1E-4)
+
     def test_cell(self):
         @model_wrapper
         class Net(nn.Module):

From 21abc280257fb8868be61264abe42534aa09188b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Thu, 3 Mar 2022 17:16:05 +0800
Subject: [PATCH 07/14] Fix #4434: support pickle in serializer (#4552)

---
 .../v2/pytorch/utils/constructor_helper.py    |  12 +-
 nni/common/serializer.py                      | 203 +++++++++++++++---
 .../evaluator/pytorch/cgo/evaluator.py        |   1 +
 nni/retiarii/serializer.py                    |  59 +++--
 test/ut/retiarii/test_cgo_engine.py           |   1 +
 test/ut/sdk/test_serializer.py                |  93 +++++++-
 6 files changed, 307 insertions(+), 62 deletions(-)

diff --git a/nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py b/nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py
index 6e2cdd1b75..c5672b506e 100644
--- a/nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py
+++ b/nni/algorithms/compression/v2/pytorch/utils/constructor_helper.py
@@ -10,7 +10,7 @@
 from torch.optim.lr_scheduler import _LRScheduler
 
 from nni.common.serializer import _trace_cls
-from nni.common.serializer import Traceable
+from nni.common.serializer import Traceable, is_traceable
 
 __all__ = ['OptimizerConstructHelper', 'LRSchedulerConstructHelper']
 
@@ -80,14 +80,14 @@ def call(self, wrapped_model: Module, origin2wrapped_name_map: Dict) -> Optimize
 
     @staticmethod
     def from_trace(model: Module, optimizer_trace: Traceable):
-        assert isinstance(optimizer_trace, Traceable), \
+        assert is_traceable(optimizer_trace), \
             'Please use nni.trace to wrap the optimizer class before initialize the optimizer.'
         assert isinstance(optimizer_trace, Optimizer), \
             'It is not an instance of torch.nn.Optimizer.'
         return OptimizerConstructHelper(model,
-                                        optimizer_trace._get_nni_attr('symbol'),
-                                        *optimizer_trace._get_nni_attr('args'),
-                                        **optimizer_trace._get_nni_attr('kwargs'))
+                                        optimizer_trace.trace_symbol,
+                                        *optimizer_trace.trace_args,
+                                        **optimizer_trace.trace_kwargs)
 
 
 class LRSchedulerConstructHelper(ConstructHelper):
@@ -112,7 +112,7 @@ def call(self, optimizer: Optimizer) -> _LRScheduler:
 
     @staticmethod
     def from_trace(lr_scheduler_trace: Traceable):
-        assert isinstance(lr_scheduler_trace, Traceable), \
+        assert is_traceable(lr_scheduler_trace), \
             'Please use nni.trace to wrap the lr scheduler class before initialize the scheduler.'
         assert isinstance(lr_scheduler_trace, _LRScheduler), \
             'It is not an instance of torch.nn.lr_scheduler._LRScheduler.'
diff --git a/nni/common/serializer.py b/nni/common/serializer.py
index 41d73f8192..bdc64e4598 100644
--- a/nni/common/serializer.py
+++ b/nni/common/serializer.py
@@ -5,6 +5,7 @@
 import functools
 import inspect
 import numbers
+import sys
 import types
 import warnings
 from io import IOBase
@@ -13,7 +14,7 @@
 import cloudpickle  # use cloudpickle as backend for unserializable types and instances
 import json_tricks  # use json_tricks as serializer backend
 
-__all__ = ['trace', 'dump', 'load', 'PayloadTooLarge', 'Translatable', 'Traceable', 'is_traceable']
+__all__ = ['trace', 'dump', 'load', 'PayloadTooLarge', 'Translatable', 'Traceable', 'is_traceable', 'is_wrapped_with_trace']
 
 
 T = TypeVar('T')
@@ -23,46 +24,43 @@ class PayloadTooLarge(Exception):
     pass
 
 
-class Traceable(abc.ABC):
+class Traceable:
     """
     A traceable object have copy and dict. Copy and mutate are used to copy the object for further mutations.
     Dict returns a TraceDictType to enable serialization.
     """
-    @abc.abstractmethod
+
     def trace_copy(self) -> 'Traceable':
         """
         Perform a shallow copy.
         NOTE: NONE of the attributes will be preserved.
         This is the one that should be used when you want to "mutate" a serializable object.
         """
-        ...
+        raise NotImplementedError()
 
     @property
-    @abc.abstractmethod
     def trace_symbol(self) -> Any:
         """
         Symbol object. Could be a class or a function.
         ``get_hybrid_cls_or_func_name`` and ``import_cls_or_func_from_hybrid_name`` is a pair to
         convert the symbol into a string and convert the string back to symbol.
         """
-        ...
+        raise NotImplementedError()
 
     @property
-    @abc.abstractmethod
     def trace_args(self) -> List[Any]:
         """
         List of positional arguments passed to symbol. Usually empty if ``kw_only`` is true,
         in which case all the positional arguments are converted into keyword arguments.
         """
-        ...
+        raise NotImplementedError()
 
     @property
-    @abc.abstractmethod
     def trace_kwargs(self) -> Dict[str, Any]:
         """
         Dict of keyword arguments.
         """
-        ...
+        raise NotImplementedError()
 
 
 class Translatable(abc.ABC):
@@ -84,13 +82,27 @@ def _translate_argument(d: Any) -> Any:
 
 def is_traceable(obj: Any) -> bool:
     """
-    Check whether an object is a traceable instance (not type).
+    Check whether an object is a traceable instance or type.
+
+    Note that an object is traceable only means that it implements the "Traceable" interface,
+    and the properties have been implemented. It doesn't necessary mean that its type is wrapped with trace,
+    because the properties could be added **after** the instance has been created.
     """
     return hasattr(obj, 'trace_copy') and \
         hasattr(obj, 'trace_symbol') and \
         hasattr(obj, 'trace_args') and \
-        hasattr(obj, 'trace_kwargs') and \
-        not inspect.isclass(obj)
+        hasattr(obj, 'trace_kwargs')
+
+
+def is_wrapped_with_trace(cls_or_func: Any) -> bool:
+    """
+    Check whether a function or class is already wrapped with ``@nni.trace``.
+    If a class or function is already wrapped with trace, then the created object must be "traceable".
+    """
+    return getattr(cls_or_func, '_traced', False) and (
+        not hasattr(cls_or_func, '__dict__') or  # in case it's a function
+        '_traced' in cls_or_func.__dict__  # must be in this class, super-class traced doesn't count
+    )
 
 
 class SerializableObject(Traceable):
@@ -160,6 +172,15 @@ def __repr__(self):
 
 def inject_trace_info(obj: Any, symbol: T, args: List[Any], kwargs: Dict[str, Any]) -> Any:
     # If an object is already created, this can be a fix so that the necessary info are re-injected into the object.
+    # Make obj complying with the interface of traceable, though we cannot change its base class.
+    obj.__dict__.update(_nni_symbol=symbol, _nni_args=args, _nni_kwargs=kwargs)
+
+    return obj
+
+
+def _make_class_traceable(cls: T, create_wrapper: bool = False) -> T:
+    # Make an already exist class traceable, without creating a new class.
+    # Should be used together with `inject_trace_info`.
 
     def getter_factory(x):
         return lambda self: self.__dict__['_nni_' + x]
@@ -184,20 +205,18 @@ def trace_copy(self):
         'trace_copy': trace_copy
     }
 
-    if hasattr(obj, '__class__') and hasattr(obj, '__dict__'):
+    if not create_wrapper:
         for name, method in attributes.items():
-            setattr(obj.__class__, name, method)
+            setattr(cls, name, method)
+        return cls
     else:
-        wrapper = type('wrapper', (Traceable, type(obj)), attributes)
-        obj = wrapper(obj)  # pylint: disable=abstract-class-instantiated
-
-    # make obj complying with the interface of traceable, though we cannot change its base class
-    obj.__dict__.update(_nni_symbol=symbol, _nni_args=args, _nni_kwargs=kwargs)
-
-    return obj
+        # sometimes create_wrapper is mandatory, e.g., for built-in types like list/int.
+        # but I don't want to check here because it's unreliable.
+        wrapper = type('wrapper', (Traceable, cls), attributes)
+        return wrapper
 
 
-def trace(cls_or_func: T = None, *, kw_only: bool = True) -> Union[T, Traceable]:
+def trace(cls_or_func: T = None, *, kw_only: bool = True, inheritable: bool = False) -> Union[T, Traceable]:
     """
     Annotate a function or a class if you want to preserve where it comes from.
     This is usually used in the following scenarios:
@@ -221,6 +240,9 @@ def trace(cls_or_func: T = None, *, kw_only: bool = True) -> Union[T, Traceable]
     list and types. This can be useful to extract semantics, but can be tricky in some corner cases.
     Therefore, in some cases, some positional arguments will still be kept.
 
+    If ``inheritable`` is true, the trace information from superclass will also be available in subclass.
+    This however, will make the subclass un-trace-able. Note that this argument has no effect when tracing functions.
+
     .. warning::
 
         Generators will be first expanded into a list, and the resulting list will be further passed into the wrapped function/class.
@@ -237,10 +259,10 @@ def foo(bar):
 
     def wrap(cls_or_func):
         # already annotated, do nothing
-        if getattr(cls_or_func, '_traced', False):
+        if is_wrapped_with_trace(cls_or_func):
             return cls_or_func
         if isinstance(cls_or_func, type):
-            cls_or_func = _trace_cls(cls_or_func, kw_only)
+            cls_or_func = _trace_cls(cls_or_func, kw_only, inheritable=inheritable)
         elif _is_function(cls_or_func):
             cls_or_func = _trace_func(cls_or_func, kw_only)
         else:
@@ -353,11 +375,60 @@ def load(string: Optional[str] = None, *, fp: Optional[Any] = None, ignore_comme
         return json_tricks.load(fp, obj_pairs_hooks=hooks, **json_tricks_kwargs)
 
 
-def _trace_cls(base, kw_only, call_super=True):
+def _trace_cls(base, kw_only, call_super=True, inheritable=False):
     # the implementation to trace a class is to store a copy of init arguments
     # this won't support class that defines a customized new but should work for most cases
 
-    class wrapper(SerializableObject, base):
+    if sys.platform != 'linux':
+        if not call_super:
+            raise ValueError("'call_super' is mandatory to be set true on non-linux platform")
+
+        try:
+            # In non-linux envs, dynamically creating new classes doesn't work with pickle.
+            # We have to replace the ``__init__`` with a new ``__init__``.
+            # This, however, causes side-effects where the replacement is not intended.
+            # This also doesn't work built-in types (e.g., OrderedDict), and the replacement
+            # won't be effective any more if ``nni.trace`` is called in-place (e.g., ``nni.trace(nn.Conv2d)(...)``).
+            original_init = base.__init__
+
+            # Makes the new init have the exact same signature as the old one,
+            # so as to make pytorch-lightning happy.
+            # https://github.com/PyTorchLightning/pytorch-lightning/blob/4cc05b2cf98e49168a5f5dc265647d75d1d3aae9/pytorch_lightning/utilities/parsing.py#L143
+            @functools.wraps(original_init)
+            def new_init(self, *args, **kwargs):
+                args, kwargs = _formulate_arguments(original_init, args, kwargs, kw_only, is_class_init=True)
+                original_init(
+                    self,
+                    *[_argument_processor(arg) for arg in args],
+                    **{kw: _argument_processor(arg) for kw, arg in kwargs.items()}
+                )
+                inject_trace_info(self, base, args, kwargs)
+
+            base.__init__ = new_init
+
+            base = _make_class_traceable(base)
+            return base
+
+        except TypeError:
+            warnings.warn("In-place __init__ replacement failed in `@nni.trace`, probably because the type is a built-in/extension type, "
+                          "and it's __init__ can't be replaced. `@nni.trace` is now falling back to the 'inheritance' approach. "
+                          "However, this could cause issues when using pickle. See https://github.com/microsoft/nni/issues/4434",
+                          RuntimeWarning)
+
+    # This is trying to solve the case where superclass and subclass are both decorated with @nni.trace.
+    # We use a metaclass to "unwrap" the superclass.
+    # However, this doesn't work if:
+    # 1. Base class already has a customized metaclass. We will raise error in that class.
+    # 2. SerializableObject in ancester (instead of parent). I think this case is rare and I didn't handle this case yet. FIXME
+    if type(base) is type and not inheritable:
+        metaclass = _unwrap_metaclass
+    else:
+        metaclass = type
+        if SerializableObject in inspect.getmro(base):
+            raise TypeError(f"{base} has a superclass already decorated with trace, and it's using a customized metaclass {type(base)}. "
+                            "Please either use the default metaclass, or remove trace from the super-class.")
+
+    class wrapper(SerializableObject, base, metaclass=metaclass):
         def __init__(self, *args, **kwargs):
             # store a copy of initial parameters
             args, kwargs = _formulate_arguments(base.__init__, args, kwargs, kw_only, is_class_init=True)
@@ -365,6 +436,32 @@ def __init__(self, *args, **kwargs):
             # calling serializable object init to initialize the full object
             super().__init__(symbol=base, args=args, kwargs=kwargs, call_super=call_super)
 
+        def __reduce__(self):
+            # The issue that decorator and pickler doesn't play well together is well known.
+            # The workaround solution is to use a fool class (_pickling_object) which pretends to be the pickled object.
+            # We then put the original type, as well as args and kwargs in its `__new__` argument.
+            # I suspect that their could still be problems when things get complex,
+            # e.g., the wrapped class has a custom pickling (`__reduce__``) or `__new__`.
+            # But it can't be worse because the previous pickle doesn't work at all.
+            #
+            # Linked issue: https://github.com/microsoft/nni/issues/4434
+            # SO: https://stackoverflow.com/questions/52185507/pickle-and-decorated-classes-picklingerror-not-the-same-object
+
+            # Store the inner class. The wrapped class couldn't be properly pickled.
+            type_ = cloudpickle.dumps(type(self).__wrapped__)
+
+            # in case they have customized ``__getstate__``.
+            if hasattr(self, '__getstate__'):
+                obj_ = self.__getstate__()
+            else:
+                obj_ = self.__dict__
+
+            # Pickle can't handle type objects.
+            if '_nni_symbol' in obj_:
+                obj_['_nni_symbol'] = cloudpickle.dumps(obj_['_nni_symbol'])
+
+            return _pickling_object, (type_, kw_only, obj_)
+
     _copy_class_wrapper_attributes(base, wrapper)
 
     return wrapper
@@ -391,6 +488,8 @@ def wrapper(*args, **kwargs):
         elif hasattr(res, '__class__') and hasattr(res, '__dict__'):
             # is a class, inject interface directly
             # need to be done before primitive types because there could be inheritance here.
+            if not getattr(type(res), '_traced', False):
+                _make_class_traceable(type(res), False)  # in-place
             res = inject_trace_info(res, func, args, kwargs)
         elif isinstance(res, (collections.abc.Callable, types.ModuleType, IOBase)):
             raise TypeError(f'Try to add trace info to {res}, but functions and modules are not supported.')
@@ -400,6 +499,8 @@ def wrapper(*args, **kwargs):
             # will be directly captured by python json encoder
             # and thus not possible to restore the trace parameters after dump and reload.
             # this is a known limitation.
+            new_type = _make_class_traceable(type(res), True)
+            res = new_type(res)  # re-creating the object
             res = inject_trace_info(res, func, args, kwargs)
         else:
             raise TypeError(f'Try to add trace info to {res}, but the type "{type(res)}" is unknown. '
@@ -425,6 +526,48 @@ def _copy_class_wrapper_attributes(base, wrapper):
     wrapper.__wrapped__ = base
 
 
+class _unwrap_metaclass(type):
+    # When a subclass is created, it detects whether the super-class is already annotated with @nni.trace.
+    # If yes, it gets the ``__wrapped__`` inner class, so that it doesn't inherit SerializableObject twice.
+    # Note that this doesn't work when metaclass is already defined (such as ABCMeta). We give up in that case.
+
+    def __new__(cls, name, bases, dct):
+        bases = tuple([getattr(base, '__wrapped__', base) for base in bases])
+        return super().__new__(cls, name, bases, dct)
+
+    # Using a customized "bases" breaks default isinstance and issubclass.
+    # We recover this by overriding the subclass and isinstance behavior, which conerns wrapped class only.
+    def __subclasscheck__(cls, subclass):
+        inner_cls = getattr(cls, '__wrapped__', cls)
+        return inner_cls in inspect.getmro(subclass)
+
+    def __instancecheck__(cls, instance):
+        inner_cls = getattr(cls, '__wrapped__', cls)
+        return inner_cls in inspect.getmro(type(instance))
+
+
+class _pickling_object:
+    # Need `cloudpickle.load` on the callable because the callable is pickled with cloudpickle.
+    # Used in `_trace_cls`.
+
+    def __new__(cls, type_, kw_only, data):
+        type_ = cloudpickle.loads(type_)
+        # Restore the trace type
+        type_ = _trace_cls(type_, kw_only)
+
+        # restore type
+        if '_nni_symbol' in data:
+            data['_nni_symbol'] = cloudpickle.loads(data['_nni_symbol'])
+
+        # https://docs.python.org/3/library/pickle.html#pickling-class-instances
+        obj = type_.__new__(type_)
+        if hasattr(obj, '__setstate__'):
+            obj.__setstate__(data)
+        else:
+            obj.__dict__.update(data)
+        return obj
+
+
 def _argument_processor(arg):
     # 1) translate
     # handle cases like ValueChoice
@@ -533,7 +676,9 @@ def _import_cls_or_func_from_name(target: str) -> Any:
 
 def _strip_trace_type(traceable: Any) -> Any:
     if getattr(traceable, '_traced', False):
-        return traceable.__wrapped__
+        # sometimes, ``__wrapped__`` could be unavailable (e.g., with `inject_trace_info`)
+        # need to have a default value
+        return getattr(traceable, '__wrapped__', traceable)
     return traceable
 
 
@@ -598,7 +743,7 @@ def _json_tricks_serializable_object_encode(obj: Any, primitives: bool = False,
     # Encodes a serializable object instance to json.
 
     # do nothing to instance that is not a serializable object and do not use trace
-    if not use_trace or not is_traceable(obj):
+    if not (use_trace and hasattr(obj, '__class__') and is_traceable(type(obj))):
         return obj
 
     if isinstance(obj.trace_symbol, property):
diff --git a/nni/retiarii/evaluator/pytorch/cgo/evaluator.py b/nni/retiarii/evaluator/pytorch/cgo/evaluator.py
index 967ad6dbaa..d0c2d8dce7 100644
--- a/nni/retiarii/evaluator/pytorch/cgo/evaluator.py
+++ b/nni/retiarii/evaluator/pytorch/cgo/evaluator.py
@@ -101,6 +101,7 @@ def _get_validation_metrics(self):
             return {name: self.trainer.callback_metrics['val_' + name].item() for name in self.metrics}
 
 
+@nni.trace
 class MultiModelSupervisedLearningModule(_MultiModelSupervisedLearningModule):
     """
     Lightning Module of SupervisedLearning for Cross-Graph Optimization.
diff --git a/nni/retiarii/serializer.py b/nni/retiarii/serializer.py
index 933475628d..58d38d97ff 100644
--- a/nni/retiarii/serializer.py
+++ b/nni/retiarii/serializer.py
@@ -5,7 +5,7 @@
 import warnings
 from typing import Any, TypeVar, Union
 
-from nni.common.serializer import Traceable, is_traceable, trace, _copy_class_wrapper_attributes
+from nni.common.serializer import Traceable, is_traceable, is_wrapped_with_trace, trace, _copy_class_wrapper_attributes
 from .utils import ModelNamespace
 
 __all__ = ['get_init_parameters_or_fail', 'serialize', 'serialize_cls', 'basic_unit', 'model_wrapper',
@@ -64,7 +64,8 @@ def basic_unit(cls: T, basic_unit_tag: bool = True) -> Union[T, Traceable]:
         class PrimitiveOp(nn.Module):
             ...
     """
-    _check_wrapped(cls)
+    if _check_wrapped(cls, 'basic_unit'):
+        return cls
 
     import torch.nn as nn
     assert issubclass(cls, nn.Module), 'When using @basic_unit, the class must be a subclass of nn.Module.'
@@ -72,15 +73,7 @@ class PrimitiveOp(nn.Module):
     cls = trace(cls)
     cls._nni_basic_unit = basic_unit_tag
 
-    # HACK: for torch script
-    # https://github.com/pytorch/pytorch/pull/45261
-    # https://github.com/pytorch/pytorch/issues/54688
-    # I'm not sure whether there will be potential issues
-    import torch
-    cls._get_nni_attr = torch.jit.ignore(cls._get_nni_attr)
-    cls.trace_symbol = torch.jit.unused(cls.trace_symbol)
-    cls.trace_args = torch.jit.unused(cls.trace_args)
-    cls.trace_kwargs = torch.jit.unused(cls.trace_kwargs)
+    _torchscript_patch(cls)
 
     return cls
 
@@ -103,12 +96,14 @@ class MyModel(nn.Module):
     Currently, NNI might not complain in simple cases where ``@model_wrapper`` is actually not needed.
     But in future, we might enforce ``@model_wrapper`` to be required for base model.
     """
-    _check_wrapped(cls)
+    if _check_wrapped(cls, 'model_wrapper'):
+        return cls
 
     import torch.nn as nn
     assert issubclass(cls, nn.Module)
 
-    wrapper = trace(cls)
+    # subclass can still use trace info
+    wrapper = trace(cls, inheritable=True)
 
     class reset_wrapper(wrapper):
         def __init__(self, *args, **kwargs):
@@ -116,8 +111,12 @@ def __init__(self, *args, **kwargs):
                 super().__init__(*args, **kwargs)
 
     _copy_class_wrapper_attributes(wrapper, reset_wrapper)
-    reset_wrapper.__wrapped__ = wrapper.__wrapped__
+    reset_wrapper.__wrapped__ = getattr(wrapper, '__wrapped__', wrapper)
     reset_wrapper._nni_model_wrapper = True
+    reset_wrapper._traced = True
+
+    _torchscript_patch(cls)
+
     return reset_wrapper
 
 
@@ -133,6 +132,32 @@ def is_model_wrapped(cls_or_instance) -> bool:
     return getattr(cls_or_instance, '_nni_model_wrapper', False)
 
 
-def _check_wrapped(cls: T) -> bool:
-    if getattr(cls, '_traced', False) or getattr(cls, '_nni_model_wrapper', False):
-        raise TypeError(f'{cls} is already wrapped with trace wrapper (basic_unit / model_wrapper / trace). Cannot wrap again.')
+def _check_wrapped(cls: T, rewrap: str) -> bool:
+    wrapped = None
+    if is_model_wrapped(cls):
+        wrapped = 'model_wrapper'
+    elif is_basic_unit(cls):
+        wrapped = 'basic_unit'
+    elif is_wrapped_with_trace(cls):
+        wrapped = 'nni.trace'
+    if wrapped:
+        if wrapped != rewrap:
+            raise TypeError(f'{cls} is already wrapped with {wrapped}. Cannot rewrap with {rewrap}.')
+        return True
+    return False
+
+
+def _torchscript_patch(cls) -> None:
+    # HACK: for torch script
+    # https://github.com/pytorch/pytorch/pull/45261
+    # https://github.com/pytorch/pytorch/issues/54688
+    # I'm not sure whether there will be potential issues
+    import torch
+    if hasattr(cls, '_get_nni_attr'):  # could not exist on non-linux
+        cls._get_nni_attr = torch.jit.ignore(cls._get_nni_attr)
+    if hasattr(cls, 'trace_symbol'):
+        # these must all exist or all non-exist
+        cls.trace_symbol = torch.jit.unused(cls.trace_symbol)
+        cls.trace_args = torch.jit.unused(cls.trace_args)
+        cls.trace_kwargs = torch.jit.unused(cls.trace_kwargs)
+        cls.trace_copy = torch.jit.ignore(cls.trace_copy)
diff --git a/test/ut/retiarii/test_cgo_engine.py b/test/ut/retiarii/test_cgo_engine.py
index 0b8fa626b5..b7b3b85d2e 100644
--- a/test/ut/retiarii/test_cgo_engine.py
+++ b/test/ut/retiarii/test_cgo_engine.py
@@ -9,6 +9,7 @@
 from pathlib import Path
 
 import nni
+import nni.runtime.platform.test
 
 try:
     from nni.common.device import GPUDevice
diff --git a/test/ut/sdk/test_serializer.py b/test/ut/sdk/test_serializer.py
index 6a7515b67c..ab5049c6af 100644
--- a/test/ut/sdk/test_serializer.py
+++ b/test/ut/sdk/test_serializer.py
@@ -1,4 +1,5 @@
 import math
+import pickle
 import sys
 from pathlib import Path
 
@@ -27,6 +28,11 @@ def __init__(self, a, b=1):
         self._b = b
 
 
+@nni.trace
+class EmptyClass:
+    pass
+
+
 class UnserializableSimpleClass:
     def __init__(self):
         self._a = 1
@@ -124,7 +130,8 @@ def test_custom_class():
 
     module = nni.trace(Foo)(Foo(1), 5)
     dumped_module = nni.dump(module)
-    assert len(dumped_module) > 200  # should not be too longer if the serialization is correct
+    module = nni.load(dumped_module)
+    assert module.bb[0] == module.bb[999] == 6
 
     module = nni.trace(Foo)(nni.trace(Foo)(1), 5)
     dumped_module = nni.dump(module)
@@ -193,6 +200,20 @@ def test_dataset():
     assert y.size() == torch.Size([10])
 
 
+def test_pickle():
+    pickle.dumps(EmptyClass())
+    obj = SimpleClass(1)
+    obj = pickle.loads(pickle.dumps(obj))
+
+    assert obj._a == 1
+    assert obj._b == 1
+
+    obj = SimpleClass(1)
+    obj.xxx = 3
+    obj = pickle.loads(pickle.dumps(obj))
+    assert obj.xxx == 3
+
+
 @pytest.mark.skipif(sys.platform != 'linux', reason='https://github.com/microsoft/nni/issues/4434')
 def test_multiprocessing_dataloader():
     # check whether multi-processing works
@@ -208,6 +229,28 @@ def test_multiprocessing_dataloader():
     assert y.size() == torch.Size([10])
 
 
+def _test_multiprocessing_dataset_worker(dataset):
+    if sys.platform == 'linux':
+        # on non-linux, the loaded object will become non-traceable
+        # due to an implementation limitation
+        assert is_traceable(dataset)
+    else:
+        from torch.utils.data import Dataset
+        assert isinstance(dataset, Dataset)
+
+
+def test_multiprocessing_dataset():
+    from torch.utils.data import Dataset
+
+    dataset = nni.trace(Dataset)()
+
+    import multiprocessing
+    process = multiprocessing.Process(target=_test_multiprocessing_dataset_worker, args=(dataset, ))
+    process.start()
+    process.join()
+    assert process.exitcode == 0
+
+
 def test_type():
     assert nni.dump(torch.optim.Adam) == '{"__nni_type__": "path:torch.optim.adam.Adam"}'
     assert nni.load('{"__nni_type__": "path:torch.optim.adam.Adam"}') == torch.optim.Adam
@@ -220,10 +263,20 @@ def test_lightning_earlystop():
     import nni.retiarii.evaluator.pytorch.lightning as pl
     from pytorch_lightning.callbacks.early_stopping import EarlyStopping
     trainer = pl.Trainer(callbacks=[nni.trace(EarlyStopping)(monitor="val_loss")])
-    trainer = nni.load(nni.dump(trainer))
+    pickle_size_limit = 4096 if sys.platform == 'linux' else 32768
+    trainer = nni.load(nni.dump(trainer, pickle_size_limit=pickle_size_limit))
     assert any(isinstance(callback, EarlyStopping) for callback in trainer.callbacks)
 
 
+def test_pickle_trainer():
+    import nni.retiarii.evaluator.pytorch.lightning as pl
+    from pytorch_lightning import Trainer
+    trainer = pl.Trainer(max_epochs=1)
+    data = pickle.dumps(trainer)
+    trainer = pickle.loads(data)
+    assert isinstance(trainer, Trainer)
+
+
 def test_generator():
     import torch.nn as nn
     import torch.optim as optim
@@ -272,11 +325,31 @@ def foo(a, *, b=3, c=5):
     assert lstm.trace_kwargs == {'input_size': 2, 'hidden_size': 2}
 
 
-if __name__ == '__main__':
-    # test_simple_class()
-    # test_external_class()
-    # test_nested_class()
-    # test_unserializable()
-    # test_basic_unit()
-    # test_generator()
-    test_arguments_kind()
+def test_subclass():
+    @nni.trace
+    class Super:
+        def __init__(self, a, b):
+            self._a = a
+            self._b = b
+
+    class Sub1(Super):
+        def __init__(self, c, d):
+            super().__init__(3, 4)
+            self._c = c
+            self._d = d
+
+    @nni.trace
+    class Sub2(Super):
+        def __init__(self, c, d):
+            super().__init__(3, 4)
+            self._c = c
+            self._d = d
+
+    obj = Sub1(1, 2)
+    # There could be trace_kwargs for obj. Behavior is undefined.
+    assert obj._a == 3 and obj._c == 1
+    assert isinstance(obj, Super)
+    obj = Sub2(1, 2)
+    assert obj.trace_kwargs == {'c': 1, 'd': 2}
+    assert issubclass(type(obj), Super)
+    assert isinstance(obj, Super)

From 3836689f2caed97619a0216f8f2c5a1f1aaff8e8 Mon Sep 17 00:00:00 2001
From: Ningxin Zheng <49771382+zheng-ningxin@users.noreply.github.com>
Date: Fri, 4 Mar 2022 16:44:10 +0800
Subject: [PATCH 08/14] issue 4540 (#4594)

---
 nni/compression/pytorch/speedup/infer_mask.py | 10 +++--
 .../pytorch/utils/shape_dependency.py         |  8 +++-
 test/ut/compression/v1/test_model_speedup.py  | 40 +++++++++++++++++++
 3 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/nni/compression/pytorch/speedup/infer_mask.py b/nni/compression/pytorch/speedup/infer_mask.py
index 8ace639207..312f60ecd4 100644
--- a/nni/compression/pytorch/speedup/infer_mask.py
+++ b/nni/compression/pytorch/speedup/infer_mask.py
@@ -171,10 +171,14 @@ def __apply_input_mask(self):
             # apply the input mask
             for tid, in_tensor in enumerate(self.dummy_input):
                 if isinstance(in_tensor, torch.Tensor) and self.in_masks[tid] is not None:
+                    # in_tensor.data = in_tensor.data * \
+                    #     self.in_masks[tid] + \
+                    #     (1-self.in_masks[tid]) * self.in_constants[tid]
+                    # issue-4540 when two tensors are multiplied, the constants part make
+                    # the propagation weaker, and lead to shape misaligment. Currently, we
+                    # donnot support the constant folding, so, we just remove the constant here
                     in_tensor.data = in_tensor.data * \
-                        self.in_masks[tid] + \
-                        (1-self.in_masks[tid]) * self.in_constants[tid]
-
+                        self.in_masks[tid]
 
     def __apply_weight_mask(self):
         """
diff --git a/nni/compression/pytorch/utils/shape_dependency.py b/nni/compression/pytorch/utils/shape_dependency.py
index f972212a5a..436e84139b 100644
--- a/nni/compression/pytorch/utils/shape_dependency.py
+++ b/nni/compression/pytorch/utils/shape_dependency.py
@@ -163,7 +163,13 @@ def build_dependency(self):
             parent_layers = []
             # find the node that contains aten::add
             # or aten::cat operations
-            if node.op_type in ADD_TYPES:
+            if node.op_type in ADD_TYPES or node.op_type in MUL_TYPES:
+                # refer issue 4540 for more details. Multiplication actually
+                # will not introduce the channel dependency, cause the misaligned
+                # channels can propagate to each other. However, when one of the input
+                # tensor is from skip connection(residual), the channel propagation
+                # may be failed(the input is also used by another layer and cannot be
+                # pruned), in this case, we need to fix the conflict maunally.
                 parent_layers = self._get_parent_layers(node)
             elif node.op_type == CAT_TYPE:
                 # To determine if this cat operation will introduce channel
diff --git a/test/ut/compression/v1/test_model_speedup.py b/test/ut/compression/v1/test_model_speedup.py
index 9d0ff7cf86..a70f010f6d 100644
--- a/test/ut/compression/v1/test_model_speedup.py
+++ b/test/ut/compression/v1/test_model_speedup.py
@@ -512,6 +512,46 @@ def forward(self, x):
         print("Fine-grained speeduped model")
         print(model)
 
+    def test_multiplication_speedup(self):
+        """
+        Model from issue 4540.
+        """
+        class Net(torch.nn.Module):
+            def __init__(self,):
+                super(Net, self).__init__()
+                self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
+                self.input = torch.nn.Conv2d(3, 8, 3)
+                self.bn = torch.nn.BatchNorm2d(8)
+                self.fc1 = torch.nn.Conv2d(8, 16, 1)
+                self.fc2 = torch.nn.Conv2d(16, 8, 1)
+                self.activation = torch.nn.ReLU()
+                self.scale_activation = torch.nn.Hardsigmoid()
+                self.out = torch.nn.Conv2d(8, 12, 1)
+
+            def forward(self, input):
+                input = self.activation(self.bn(self.input(input)))
+                scale = self.avgpool(input)
+                out1 = self.activation(self.fc1(scale))
+                out1 = self.scale_activation(self.fc2(out1))
+                return self.out(out1 * input)
+
+        model = Net().to(device)
+        model.eval()
+        im = torch.ones(1, 3, 512, 512).to(device)
+        model(im)
+        cfg_list = []
+
+        for name, module in model.named_modules():
+            if isinstance(module, torch.nn.Conv2d):
+                cfg_list.append({'op_types':['Conv2d'], 'sparsity':0.3, 'op_names':[name]})
+
+        pruner = L1FilterPruner(model, cfg_list)
+        pruner.compress()
+        pruner.export_model(MODEL_FILE, MASK_FILE)
+        pruner._unwrap_model()
+        ms=ModelSpeedup(model, im, MASK_FILE)
+        ms.speedup_model()
+
     def tearDown(self):
         if os.path.exists(MODEL_FILE):
             os.remove(MODEL_FILE)

From e0cebd67c1d67a86385c48f1658d69063d5d882a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 4 Mar 2022 16:55:59 +0800
Subject: [PATCH 09/14] Bump url-parse from 1.5.7 to 1.5.10 in
 /ts/jupyter_extension (#4597)

---
 ts/jupyter_extension/yarn.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ts/jupyter_extension/yarn.lock b/ts/jupyter_extension/yarn.lock
index b8ae1f6065..3b4854d8c2 100644
--- a/ts/jupyter_extension/yarn.lock
+++ b/ts/jupyter_extension/yarn.lock
@@ -2907,9 +2907,9 @@ url-parse-lax@^3.0.0:
     prepend-http "^2.0.0"
 
 url-parse@~1.5.1:
-  version "1.5.7"
-  resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.7.tgz#00780f60dbdae90181f51ed85fb24109422c932a"
-  integrity sha512-HxWkieX+STA38EDk7CE9MEryFeHCKzgagxlGvsdS7WBImq9Mk+PGwiT56w82WI3aicwJA8REp42Cxo98c8FZMA==
+  version "1.5.10"
+  resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1"
+  integrity sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ==
   dependencies:
     querystringify "^2.1.1"
     requires-port "^1.0.0"

From 358ea2ebdb4771b958926d73b0f3a48357e28536 Mon Sep 17 00:00:00 2001
From: zzp_miracle <172422511@qq.com>
Date: Sun, 6 Mar 2022 12:34:20 +0800
Subject: [PATCH 10/14] remove redundant Chinese document (#4615)

---
 README_zh_CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_zh_CN.md b/README_zh_CN.md
index 9e551c8ef5..33de8dfce9 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -354,7 +354,7 @@ NNI 有一个月度发布周期（主要发布）。 如果您遇到问题可以
 
 * [OpenPAI](https://github.com/Microsoft/pai)：作为开源平台，提供了完整的 AI 模型训练和资源管理能力，能轻松扩展，并支持各种规模的私有部署、云和混合环境。
 * [FrameworkController](https://github.com/Microsoft/frameworkcontroller)：开源的通用 Kubernetes Pod 控制器，通过单个控制器来编排 Kubernetes 上所有类型的应用。
-* [MMdnn](https://github.com/Microsoft/MMdnn)：一个完整、跨框架的解决方案，能够转换、可视化、诊断深度神经网络模型。 MMdnn 中的 "MM" 表示 model management（模型管理），而 "dnn" 是 deep neural network（深度神经网络）的缩写。 MMdnn 中的 "MM" 表示 model management（模型管理），而 "dnn" 是 deep neural network（深度神经网络）的缩写。
+* [MMdnn](https://github.com/Microsoft/MMdnn)：一个完整、跨框架的解决方案，能够转换、可视化、诊断深度神经网络模型。 MMdnn 中的 "MM" 表示 model management（模型管理），而 "dnn" 是 deep neural network（深度神经网络）的缩写。
 * [SPTAG](https://github.com/Microsoft/SPTAG) : Space Partition Tree And Graph (SPTAG) 是用于大规模向量的最近邻搜索场景的开源库。
 
 我们鼓励研究人员和学生利用这些项目来加速 AI 开发和研究。

From 68ca6f21ee0d163fa17c5e303f470da84c0c4c97 Mon Sep 17 00:00:00 2001
From: Jiahang Xu <jiahangxu@microsoft.com>
Date: Mon, 7 Mar 2022 16:22:50 +0800
Subject: [PATCH 11/14] Fix bug Issue4592 (#4614)

---
 examples/nas/multi-trial/mnasnet/base_mnasnet.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/nas/multi-trial/mnasnet/base_mnasnet.py b/examples/nas/multi-trial/mnasnet/base_mnasnet.py
index f431812e3c..c4db4c7761 100644
--- a/examples/nas/multi-trial/mnasnet/base_mnasnet.py
+++ b/examples/nas/multi-trial/mnasnet/base_mnasnet.py
@@ -4,8 +4,8 @@
 
 import torch
 import torch.nn as torch_nn
-from torchvision.models.utils import load_state_dict_from_url
 import torch.nn.functional as F
+from nni.retiarii import model_wrapper
 
 import sys
 from pathlib import Path
@@ -111,7 +111,7 @@ def _get_depths(depths, alpha):
     rather than down. """
     return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
 
-
+@model_wrapper
 class MNASNet(nn.Module):
     """ MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
     implements the B1 variant of the model.
@@ -180,7 +180,7 @@ def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
             nn.ReLU(inplace=True),
         ]
         self.layers = nn.Sequential(*layers)
-        self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
+        self.classifier = nn.Sequential(nn.Dropout(p=dropout),
                                         nn.Linear(1280, num_classes))
         self._initialize_weights()
         #self.for_test = 10

From 0a52ae6f98bdc6330a553f0e60c5b0618ab60df8 Mon Sep 17 00:00:00 2001
From: Ningxin Zheng <49771382+zheng-ningxin@users.noreply.github.com>
Date: Fri, 11 Mar 2022 16:57:54 +0800
Subject: [PATCH 12/14] support flatten (#4628)

---
 nni/compression/pytorch/speedup/compress_modules.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nni/compression/pytorch/speedup/compress_modules.py b/nni/compression/pytorch/speedup/compress_modules.py
index 64cb0abfbb..94a0517b2e 100644
--- a/nni/compression/pytorch/speedup/compress_modules.py
+++ b/nni/compression/pytorch/speedup/compress_modules.py
@@ -41,7 +41,8 @@
     'Dropout3d': lambda module, masks: no_replace(module, masks),
     'Upsample': lambda module, masks: no_replace(module, masks),
     'LayerNorm': lambda module, masks: replace_layernorm(module, masks),
-    'ConvTranspose2d': lambda module, masks: replace_convtranspose2d(module, masks)
+    'ConvTranspose2d': lambda module, masks: replace_convtranspose2d(module, masks),
+    'Flatten': lambda module, masks: no_replace(module, masks)
 }
 
 

From 2499be70fcf6cd74e50813a6a3dfd945a363feb6 Mon Sep 17 00:00:00 2001
From: liuzhe-lz <40699903+liuzhe-lz@users.noreply.github.com>
Date: Fri, 11 Mar 2022 16:58:47 +0800
Subject: [PATCH 13/14] update contrib.rocks url (#4624)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b17838c96a..198a74d817 100644
--- a/README.md
+++ b/README.md
@@ -314,7 +314,7 @@ To learn more about making a contribution to NNI, please refer to our [How-to co
 
 We appreciate all contributions and thank all the contributors!
 
-<a href="https://github.com/microsoft/nni/graphs/contributors"><img src="https://contrib.rocks/image?repo=microsoft/nni" /></a>
+<a href="https://github.com/microsoft/nni/graphs/contributors"><img src="https://contrib.rocks/image?repo=microsoft/nni&max=240" /></a>
 
 
 ## **Feedback**

From 3b27ac762f0928b627dfde049e7440a69f43fe6a Mon Sep 17 00:00:00 2001
From: liuzhe-lz <40699903+liuzhe-lz@users.noreply.github.com>
Date: Mon, 21 Mar 2022 01:40:31 +0800
Subject: [PATCH 14/14] Refactor NNI manager globals (step 1) - argparse
 (#4510)

---
 nni/experiment/experiment.py                  |  8 +-
 nni/experiment/launcher.py                    | 44 ++++-----
 nni/tools/nnictl/legacy_launcher.py           | 16 ++--
 ts/nni_manager/common/globals/arguments.ts    | 96 +++++++++++++++++++
 ts/nni_manager/main.ts                        | 92 ++++--------------
 ts/nni_manager/package.json                   |  4 +-
 .../test/common/globals/arguments.test.ts     | 69 +++++++++++++
 ts/nni_manager/yarn.lock                      | 27 +++++-
 8 files changed, 242 insertions(+), 114 deletions(-)
 create mode 100644 ts/nni_manager/common/globals/arguments.ts
 create mode 100644 ts/nni_manager/test/common/globals/arguments.test.ts

diff --git a/nni/experiment/experiment.py b/nni/experiment/experiment.py
index 57ff2356bf..e3ddffe2a8 100644
--- a/nni/experiment/experiment.py
+++ b/nni/experiment/experiment.py
@@ -91,7 +91,7 @@ def __init__(self, config=None, training_service=None):
         self.id: str = management.generate_experiment_id()
         self.port: Optional[int] = None
         self._proc: Optional[Popen] = None
-        self.mode = 'new'
+        self.action = 'create'
         self.url_prefix: Optional[str] = None
 
         args = [config, training_service]  # deal with overloading
@@ -127,7 +127,7 @@ def start(self, port: int = 8080, debug: bool = False, run_mode: RunMode = RunMo
             log_dir = Path.home() / f'nni-experiments/{self.id}/log'
         nni.runtime.log.start_experiment_log(self.id, log_dir, debug)
 
-        self._proc = launcher.start_experiment(self.mode, self.id, config, port, debug, run_mode, self.url_prefix)
+        self._proc = launcher.start_experiment(self.action, self.id, config, port, debug, run_mode, self.url_prefix)
         assert self._proc is not None
 
         self.port = port  # port will be None if start up failed
@@ -261,7 +261,7 @@ def view(experiment_id: str, port: int = 8080, non_blocking: bool = False):
     def _resume(exp_id, exp_dir=None):
         exp = Experiment()
         exp.id = exp_id
-        exp.mode = 'resume'
+        exp.action = 'resume'
         exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
         return exp
 
@@ -269,7 +269,7 @@ def _resume(exp_id, exp_dir=None):
     def _view(exp_id, exp_dir=None):
         exp = Experiment()
         exp.id = exp_id
-        exp.mode = 'view'
+        exp.action = 'view'
         exp.config = launcher.get_stopped_experiment_config(exp_id, exp_dir)
         return exp
 
diff --git a/nni/experiment/launcher.py b/nni/experiment/launcher.py
index 8eff304cca..f7abbf2530 100644
--- a/nni/experiment/launcher.py
+++ b/nni/experiment/launcher.py
@@ -27,23 +27,27 @@
 
 @dataclass(init=False)
 class NniManagerArgs:
+    # argv sent to "ts/nni_manager/main.js"
+
     port: int
     experiment_id: int
-    start_mode: str  # new or resume
-    mode: str  # training service platform
-    log_dir: str
+    action: str  # 'new', 'resume', 'view'
+    mode: str  # training service platform, to be removed
+    experiments_directory: str  # renamed "config.nni_experiments_directory", must be absolute
     log_level: str
-    readonly: bool = False
     foreground: bool = False
-    url_prefix: Optional[str] = None
+    url_prefix: Optional[str] = None  # leading and trailing "/" must be stripped
     dispatcher_pipe: Optional[str] = None
 
     def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
         self.port = port
         self.experiment_id = exp_id
+        self.action = action
         self.foreground = foreground
         self.url_prefix = url_prefix
-        self.log_dir = config.experiment_working_directory
+        # config field name "experiment_working_directory" is a mistake
+        # see "ts/nni_manager/common/globals/arguments.ts" for details
+        self.experiments_directory = config.experiment_working_directory
 
         if isinstance(config.training_service, list):
             self.mode = 'hybrid'
@@ -54,20 +58,14 @@ def __init__(self, action, exp_id, config, port, debug, foreground, url_prefix):
         if debug and self.log_level not in ['debug', 'trace']:
             self.log_level = 'debug'
 
-        if action == 'resume':
-            self.start_mode = 'resume'
-        elif action == 'view':
-            self.start_mode = 'resume'
-            self.readonly = True
-        else:
-            self.start_mode = 'new'
-
     def to_command_line_args(self):
+        # reformat fields to meet yargs library's format
+        # see "ts/nni_manager/common/globals/arguments.ts" for details
         ret = []
         for field in fields(self):
             value = getattr(self, field.name)
             if value is not None:
-                ret.append('--' + field.name)
+                ret.append('--' + field.name.replace('_', '-'))
                 if isinstance(value, bool):
                     ret.append(str(value).lower())
                 else:
@@ -76,6 +74,8 @@ def to_command_line_args(self):
 
 def start_experiment(action, exp_id, config, port, debug, run_mode, url_prefix):
     foreground = run_mode.value == 'foreground'
+    if url_prefix is not None:
+        url_prefix = url_prefix.strip('/')
     nni_manager_args = NniManagerArgs(action, exp_id, config, port, debug, foreground, url_prefix)
 
     _ensure_port_idle(port)
@@ -135,7 +135,7 @@ def _start_rest_server(nni_manager_args, run_mode) -> Tuple[int, Popen]:
     cmd += nni_manager_args.to_command_line_args()
 
     if run_mode.value == 'detach':
-        log = Path(nni_manager_args.log_dir, nni_manager_args.experiment_id, 'log')
+        log = Path(nni_manager_args.experiments_directory, nni_manager_args.experiment_id, 'log')
         out = (log / 'nnictl_stdout.log').open('a')
         err = (log / 'nnictl_stderr.log').open('a')
         header = f'Experiment {nni_manager_args.experiment_id} start: {datetime.now()}'
@@ -201,7 +201,7 @@ def _ensure_port_idle(port: int, message: Optional[str] = None) -> None:
 
 
 def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool, experiment_id: str,
-                                pipe_path: str = None, mode: str = 'new') -> Tuple[int, Popen]:
+                                pipe_path: str, mode: str = 'create') -> Tuple[int, Popen]:
     if isinstance(config.training_service, list):
         ts = 'hybrid'
     else:
@@ -213,24 +213,20 @@ def _start_rest_server_retiarii(config: ExperimentConfig, port: int, debug: bool
         'port': port,
         'mode': ts,
         'experiment_id': experiment_id,
-        'start_mode': mode,
-        'log_dir': config.experiment_working_directory,
+        'action': mode,
+        'experiments_directory': config.experiment_working_directory,
         'log_level': 'debug' if debug else 'info'
     }
     if pipe_path is not None:
         args['dispatcher_pipe'] = pipe_path
 
-    if mode == 'view':
-        args['start_mode'] = 'resume'
-        args['readonly'] = 'true'
-
     import nni_node
     node_dir = Path(nni_node.__path__[0])
     node = str(node_dir / ('node.exe' if sys.platform == 'win32' else 'node'))
     main_js = str(node_dir / 'main.js')
     cmd = [node, '--max-old-space-size=4096', main_js]
     for arg_key, arg_value in args.items():
-        cmd.append('--' + arg_key)
+        cmd.append('--' + arg_key.replace('_', '-'))
         cmd.append(str(arg_value))
 
     if sys.platform == 'win32':
diff --git a/nni/tools/nnictl/legacy_launcher.py b/nni/tools/nnictl/legacy_launcher.py
index 8641d11d5d..f72614a99c 100644
--- a/nni/tools/nnictl/legacy_launcher.py
+++ b/nni/tools/nnictl/legacy_launcher.py
@@ -70,21 +70,17 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log
         node_command = os.path.join(entry_dir, 'node')
     cmds = [node_command, '--max-old-space-size=4096', entry_file, '--port', str(port), '--mode', platform, \
             '--experiment_id', experiment_id]
-    if mode == 'view':
-        cmds += ['--start_mode', 'resume']
-        cmds += ['--readonly', 'true']
-    else:
-        cmds += ['--start_mode', mode]
+    cmds += ['--action', mode]
     if log_dir is not None:
-        cmds += ['--log_dir', log_dir]
+        cmds += ['--experiments-directory', log_dir]
     if log_level is not None:
-        cmds += ['--log_level', log_level]
+        cmds += ['--log-level', log_level]
     if foreground:
         cmds += ['--foreground', 'true']
     if url_prefix:
         _validate_prefix_path(url_prefix)
         set_prefix_url(url_prefix)
-        cmds += ['--url_prefix', url_prefix]
+        cmds += ['--url-prefix', url_prefix.strip('/')]
 
     stdout_full_path, stderr_full_path = get_log_path(experiment_id)
     with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file:
@@ -520,9 +516,9 @@ def create_experiment(args):
 
     try:
         if schema == 1:
-            launch_experiment(args, config_v1, 'new', experiment_id, 1)
+            launch_experiment(args, config_v1, 'create', experiment_id, 1)
         else:
-            launch_experiment(args, config_v2, 'new', experiment_id, 2)
+            launch_experiment(args, config_v2, 'create', experiment_id, 2)
     except Exception as exception:
         restServerPid = Experiments().get_all_experiments().get(experiment_id, {}).get('pid')
         if restServerPid:
diff --git a/ts/nni_manager/common/globals/arguments.ts b/ts/nni_manager/common/globals/arguments.ts
new file mode 100644
index 0000000000..f3973b59a6
--- /dev/null
+++ b/ts/nni_manager/common/globals/arguments.ts
@@ -0,0 +1,96 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+/**
+ *  Parse NNI manager's command line arguments.
+ **/
+
+import assert from 'assert/strict';
+
+import yargs from 'yargs/yargs';
+
+/**
+ *  Command line arguments provided by "nni/experiment/launcher.py".
+ *
+ *  Hyphen-separated words are automatically converted to camelCases by yargs lib, but snake_cases are not.
+ *  So it supports "--log-level" but does not support "--log_level".
+ *
+ *  Unfortunately I misunderstood "experiment_working_directory" config field when deciding the name.
+ *  It defaults to "~/nni-experiments" rather than "~/nni-experiments/<experiment-id>",
+ *  and further more the working directory is "site-packages/nni_node", not either.
+ *  For compatibility concern we cannot change the public API, so there is an inconsistency here.
+ **/
+export interface NniManagerArgs {
+    readonly port: number;
+    readonly experimentId: string;
+    readonly action: 'create' | 'resume' | 'view';
+    readonly experimentsDirectory: string;  // renamed "config.experiment_working_directory", must be absolute
+    readonly logLevel: 'critical' | 'error' | 'warning' | 'info' | 'debug';
+    readonly foreground: boolean;
+    readonly urlPrefix: string;  // leading and trailing "/" must be stripped
+
+    // these are planned to be removed
+    readonly mode: string;
+    readonly dispatcherPipe: string | undefined;
+}
+
+export function parseArgs(rawArgs: string[]): NniManagerArgs {
+    const parser = yargs(rawArgs).options(yargsOptions).strict().fail((_msg, err, _yargs) => { throw err; });
+    const parsedArgs: NniManagerArgs = parser.parseSync();
+
+    // strip yargs leftovers
+    const argsAsAny: any = {};
+    for (const key in yargsOptions) {
+        argsAsAny[key] = (parsedArgs as any)[key];
+        assert(!Number.isNaN(argsAsAny[key]), `Command line arg --${key} is not a number`);
+    }
+    if (argsAsAny.dispatcherPipe === '') {
+        argsAsAny.dispatcherPipe = undefined;
+    }
+    const args: NniManagerArgs = argsAsAny;
+
+    const prefixErrMsg = `Command line arg --url-prefix "${args.urlPrefix}" is not stripped`;
+    assert(!args.urlPrefix.startsWith('/') && !args.urlPrefix.endsWith('/'), prefixErrMsg);
+
+    return args;
+}
+
+const yargsOptions = {
+    port: {
+        demandOption: true,
+        type: 'number'
+    },
+    experimentId: {
+        demandOption: true,
+        type: 'string'
+    },
+    action: {
+        choices: [ 'create', 'resume', 'view' ] as const,
+        demandOption: true
+    },
+    experimentsDirectory: {
+        demandOption: true,
+        type: 'string'
+    },
+    logLevel: {
+        choices: [ 'critical', 'error', 'warning', 'info', 'debug' ] as const,
+        demandOption: true
+    },
+    foreground: {
+        default: false,
+        type: 'boolean'
+    },
+    urlPrefix: {
+        default: '',
+        type: 'string'
+    },
+
+    mode: {
+        default: '',
+        type: 'string'
+    },
+    dispatcherPipe: {
+        default: '',
+        type: 'string'
+    }
+} as const;
diff --git a/ts/nni_manager/main.ts b/ts/nni_manager/main.ts
index 5449e5b13c..c3a0574b8b 100644
--- a/ts/nni_manager/main.ts
+++ b/ts/nni_manager/main.ts
@@ -20,15 +20,11 @@ import { SqlDB } from './core/sqlDatabase';
 import { NNIExperimentsManager } from './core/nniExperimentsManager';
 import { NNITensorboardManager } from './core/nniTensorboardManager';
 import { RestServer } from './rest_server';
+import { parseArgs } from 'common/globals/arguments';
 
-function initStartupInfo(
-    startExpMode: string, experimentId: string, basePort: number, platform: string,
-    logDirectory: string, experimentLogLevel: string, readonly: boolean, dispatcherPipe: string, urlprefix: string): void {
-    const createNew: boolean = (startExpMode === ExperimentStartUpMode.NEW);
-    setExperimentStartupInfo(createNew, experimentId, basePort, platform, logDirectory, experimentLogLevel, readonly, dispatcherPipe, urlprefix);
-}
+const args = parseArgs(process.argv.slice(2));
 
-async function initContainer(foreground: boolean, _platformMode: string, logFileName?: string): Promise<void> {
+async function initContainer(): Promise<void> {
     Container.bind(Manager)
         .to(NNIManager)
         .scope(Scope.Singleton);
@@ -45,84 +41,32 @@ async function initContainer(foreground: boolean, _platformMode: string, logFile
         .to(NNITensorboardManager)
         .scope(Scope.Singleton);
     const DEFAULT_LOGFILE: string = path.join(getLogDir(), 'nnimanager.log');
-    if (!foreground) {
-        if (logFileName === undefined) {
-            startLogging(DEFAULT_LOGFILE);
-        } else {
-            startLogging(logFileName);
-        }
+    if (!args.foreground) {
+        startLogging(DEFAULT_LOGFILE);
     }
     // eslint-disable-next-line @typescript-eslint/no-use-before-define
-    setLogLevel(logLevel);
+    setLogLevel(args.logLevel);
     const ds: DataStore = component.get(DataStore);
 
     await ds.init();
 }
 
-function usage(): void {
-    console.info('usage: node main.js --port <port> --mode \
-    <local/remote/pai/kubeflow/frameworkcontroller/aml/adl/hybrid/dlc> --start_mode <new/resume> --experiment_id <id> --foreground <true/false>');
-}
-
-const strPort: string = parseArg(['--port', '-p']);
-if (!strPort || strPort.length === 0) {
-    usage();
-    process.exit(1);
-}
-
-const foregroundArg: string = parseArg(['--foreground', '-f']);
-if (foregroundArg && !['true', 'false'].includes(foregroundArg.toLowerCase())) {
-    console.log(`FATAL: foreground property should only be true or false`);
-    usage();
-    process.exit(1);
-}
-const foreground: boolean = (foregroundArg && foregroundArg.toLowerCase() === 'true') ? true : false;
-
-const port: number = parseInt(strPort, 10);
-
-const mode: string = parseArg(['--mode', '-m']);
-
-const startMode: string = parseArg(['--start_mode', '-s']);
-if (![ExperimentStartUpMode.NEW, ExperimentStartUpMode.RESUME].includes(startMode)) {
-    console.log(`FATAL: unknown start_mode: ${startMode}`);
-    usage();
-    process.exit(1);
-}
-
-const experimentId: string = parseArg(['--experiment_id', '-id']);
-if (experimentId.trim().length < 1) {
-    console.log(`FATAL: cannot resume the experiment, invalid experiment_id: ${experimentId}`);
-    usage();
-    process.exit(1);
-}
-
-const logDir: string = parseArg(['--log_dir', '-ld']);
-if (logDir.length > 0) {
-    if (!fs.existsSync(logDir)) {
-        console.log(`FATAL: log_dir ${logDir} does not exist`);
-    }
-}
-
-const logLevel: string = parseArg(['--log_level', '-ll']);
-
-const readonlyArg: string = parseArg(['--readonly', '-r']);
-if (readonlyArg && !['true', 'false'].includes(readonlyArg.toLowerCase())) {
-    console.log(`FATAL: readonly property should only be true or false`);
-    usage();
-    process.exit(1);
-}
-const readonly = (readonlyArg && readonlyArg.toLowerCase() == 'true') ? true : false;
-
-const dispatcherPipe: string = parseArg(['--dispatcher_pipe']);
-
-const urlPrefix: string = parseArg(['--url_prefix']);
-
-initStartupInfo(startMode, experimentId, port, mode, logDir, logLevel, readonly, dispatcherPipe, urlPrefix);
+setExperimentStartupInfo(
+    args.action === 'create',
+    args.experimentId,
+    args.port,
+    args.mode,
+    args.experimentsDirectory,
+    args.logLevel,
+    args.action === 'view',
+    args.dispatcherPipe ?? '',
+    args.urlPrefix
+);
 
 mkDirP(getLogDir())
     .then(async () => {
         try {
-            await initContainer(foreground, mode);
+            await initContainer();
             const restServer: RestServer = component.get(RestServer);
             await restServer.start();
         } catch (err) {
diff --git a/ts/nni_manager/package.json b/ts/nni_manager/package.json
index 205ba066a5..f0aa3f5f75 100644
--- a/ts/nni_manager/package.json
+++ b/ts/nni_manager/package.json
@@ -33,7 +33,8 @@
     "ts-deferred": "^1.0.4",
     "typescript-ioc": "^1.2.6",
     "typescript-string-operations": "^1.4.1",
-    "ws": "^7.4.6"
+    "ws": "^7.4.6",
+    "yargs": "^17.3.1"
   },
   "devDependencies": {
     "@types/chai": "^4.2.18",
@@ -55,6 +56,7 @@
     "@types/tar": "^4.0.4",
     "@types/tmp": "^0.2.0",
     "@types/ws": "^7.4.4",
+    "@types/yargs": "^17.0.8",
     "@typescript-eslint/eslint-plugin": "^2.10.0",
     "@typescript-eslint/parser": "^4.26.0",
     "chai": "^4.3.4",
diff --git a/ts/nni_manager/test/common/globals/arguments.test.ts b/ts/nni_manager/test/common/globals/arguments.test.ts
new file mode 100644
index 0000000000..e7ed7d9ed8
--- /dev/null
+++ b/ts/nni_manager/test/common/globals/arguments.test.ts
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+import assert from 'assert/strict';
+
+import { parseArgs } from 'common/globals/arguments';
+
+const command = '--port 80 --experiment-id ID --action resume --experiments-directory DIR --log-level error';
+const expected = {
+    port: 80,
+    experimentId: 'ID',
+    action: 'resume',
+    experimentsDirectory: 'DIR',
+    logLevel: 'error',
+    foreground: false,
+    urlPrefix: '',
+
+    mode: '',
+    dispatcherPipe: undefined,
+};
+
+function testGoodShort(): void {
+    const args = parseArgs(command.split(' '));
+    assert.deepEqual(args, expected);
+}
+
+function testGoodLong(): void {
+    const cmd = command + ' --url-prefix URL/prefix --foreground true';
+    const args = parseArgs(cmd.split(' '));
+    const expectedLong = Object.assign({}, expected);
+    expectedLong.urlPrefix = 'URL/prefix';
+    expectedLong.foreground = true;
+    assert.deepEqual(args, expectedLong);
+}
+
+function testBadKey(): void {
+    const cmd = command + ' --bad 1';
+    assert.throws(() => parseArgs(cmd.split(' ')));
+}
+
+function testBadPos(): void {
+    const cmd = command.replace('--port', 'port');
+    assert.throws(() => parseArgs(cmd.split(' ')));
+}
+
+function testBadNum(): void {
+    const cmd = command.replace('80', '8o');
+    assert.throws(() => parseArgs(cmd.split(' ')));
+}
+
+function testBadBool(): void {
+    const cmd = command + ' --foreground 1';
+    assert.throws(() => parseArgs(cmd.split(' ')));
+}
+
+function testBadChoice(): void {
+    const cmd = command.replace('resume', 'new');
+    assert.throws(() => parseArgs(cmd.split(' ')));
+}
+
+describe('## globals.arguments ##', () => {
+    it('good short', () => testGoodShort());
+    it('good long', () => testGoodLong());
+    it('bad key arg', () => testBadKey());
+    it('bad positional arg', () => testBadPos());
+    it('bad number', () => testBadNum());
+    it('bad boolean', () => testBadBool());
+    it('bad choice', () => testBadChoice());
+});
diff --git a/ts/nni_manager/yarn.lock b/ts/nni_manager/yarn.lock
index 2ab0712694..35093aa2a1 100644
--- a/ts/nni_manager/yarn.lock
+++ b/ts/nni_manager/yarn.lock
@@ -820,6 +820,18 @@
   dependencies:
     "@types/node" "*"
 
+"@types/yargs-parser@*":
+  version "20.2.1"
+  resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-20.2.1.tgz#3b9ce2489919d9e4fea439b76916abc34b2df129"
+  integrity sha512-7tFImggNeNBVMsn0vLrpn1H1uPrUBdnARPTpZoitY37ZrdJREzf7I16tMrlK3hen349gr1NYh8CmZQa7CTG6Aw==
+
+"@types/yargs@^17.0.8":
+  version "17.0.8"
+  resolved "https://registry.yarnpkg.com/@types/yargs/-/yargs-17.0.8.tgz#d23a3476fd3da8a0ea44b5494ca7fa677b9dad4c"
+  integrity sha512-wDeUwiUmem9FzsyysEwRukaEdDNcwbROvQ9QGRKaLI6t+IltNzbn4/i4asmB10auvZGQCzSQ6t0GSczEThlUXw==
+  dependencies:
+    "@types/yargs-parser" "*"
+
 "@typescript-eslint/eslint-plugin@^2.10.0":
   version "2.34.0"
   resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-2.34.0.tgz#6f8ce8a46c7dea4a6f1d171d2bb8fbae6dac2be9"
@@ -5752,7 +5764,7 @@ yallist@^4.0.0:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
   integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
 
-yargs-parser@20.2.4, yargs-parser@>=20.2.7, yargs-parser@^18.1.2, yargs-parser@^20.2.2:
+yargs-parser@20.2.4, yargs-parser@>=20.2.7, yargs-parser@^18.1.2, yargs-parser@^20.2.2, yargs-parser@^21.0.0:
   version "20.2.7"
   resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.7.tgz#61df85c113edfb5a7a4e36eb8aa60ef423cbc90a"
   integrity sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==
@@ -5797,6 +5809,19 @@ yargs@^15.0.2:
     y18n "^4.0.0"
     yargs-parser "^18.1.2"
 
+yargs@^17.3.1:
+  version "17.3.1"
+  resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.3.1.tgz#da56b28f32e2fd45aefb402ed9c26f42be4c07b9"
+  integrity sha512-WUANQeVgjLbNsEmGk20f+nlHgOqzRFpiGWVaBrYGYIGANIIu3lWjoyi0fNlFmJkvfhCZ6BXINe7/W2O2bV4iaA==
+  dependencies:
+    cliui "^7.0.2"
+    escalade "^3.1.1"
+    get-caller-file "^2.0.5"
+    require-directory "^2.1.1"
+    string-width "^4.2.3"
+    y18n "^5.0.5"
+    yargs-parser "^21.0.0"
+
 yn@3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"