From db91e8e6e4c60ef816d2f2f80b4e4c445be8a7ee Mon Sep 17 00:00:00 2001
From: SparkSnail <shinyang@microsoft.com>
Date: Wed, 25 Dec 2019 15:27:07 +0800
Subject: [PATCH 01/23] Support https in paiHost (#1873)

---
 src/nni_manager/training_service/pai/paiConfig.ts  |  2 +-
 .../training_service/pai/paiJobInfoCollector.ts    |  2 +-
 .../pai/paiK8S/paiK8STrainingService.ts            |  3 ++-
 .../training_service/pai/paiTrainingService.ts     | 14 ++++++++++++--
 .../pai/paiYarn/paiYarnTrainingService.ts          |  4 ++--
 5 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/nni_manager/training_service/pai/paiConfig.ts b/src/nni_manager/training_service/pai/paiConfig.ts
index c8f1c414fc..d6c405b0aa 100644
--- a/src/nni_manager/training_service/pai/paiConfig.ts
+++ b/src/nni_manager/training_service/pai/paiConfig.ts
@@ -9,7 +9,7 @@ import { TrialJobApplicationForm, TrialJobDetail, TrialJobStatus  } from '../../
 export class PAIClusterConfig {
     public readonly userName: string;
     public readonly passWord?: string;
-    public readonly host: string;
+    public host: string;
     public readonly token?: string;
 
     /**
diff --git a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
index ce50d4cd57..07ec1cda7f 100644
--- a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
+++ b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
@@ -52,7 +52,7 @@ export class PAIJobInfoCollector {
         // Rest call to get PAI job info and update status
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const getJobInfoRequest: request.Options = {
-            uri: `http://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
+            uri: `${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
             method: 'GET',
             json: true,
                headers: {
diff --git a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
index 4b0a0f81a2..2888a9f425 100644
--- a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
@@ -68,6 +68,7 @@ class PAIK8STrainingService extends PAITrainingService {
                 } else if(this.paiClusterConfig.token) {
                     this.paiToken = this.paiClusterConfig.token;
                 }
+                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 break;
 
             case TrialConfigMetadataKey.TRIAL_CONFIG:
@@ -257,7 +258,7 @@ class PAIK8STrainingService extends PAITrainingService {
         // Step 3. Submit PAI job via Rest call
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const submitJobRequest: request.Options = {
-            uri: `http://${this.paiClusterConfig.host}/rest-server/api/v2/jobs`,
+            uri: `${this.paiClusterConfig.host}/rest-server/api/v2/jobs`,
             method: 'POST',
             body: paiJobConfig,
             headers: {
diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts
index 0edbc66966..5709d4678d 100644
--- a/src/nni_manager/training_service/pai/paiTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiTrainingService.ts
@@ -165,7 +165,7 @@ abstract class PAITrainingService implements TrainingService {
         }
 
         const stopJobRequest: request.Options = {
-            uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
+            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
 /jobs/${trialJobDetail.paiJobName}/executionType`, 
             method: 'PUT',
             json: true,
@@ -216,6 +216,16 @@ abstract class PAITrainingService implements TrainingService {
         return this.metricsEmitter;
     }
 
+    protected formatPAIHost(host: string): string {
+        // If users' host start with 'http://' or 'https://', use the original host,
+        // or format to 'http//${host}'
+        if (host.startsWith('http://') || host.startsWith('https://')) {
+            return host;
+        } else {
+            return `http://${host}`;
+        }
+    }
+
     protected async statusCheckingLoop(): Promise<void> {
         while (!this.stopping) {
             if(this.paiClusterConfig && this.paiClusterConfig.passWord) {
@@ -259,7 +269,7 @@ abstract class PAITrainingService implements TrainingService {
         }
 
         const authenticationReq: request.Options = {
-            uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/token`,
+            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/token`,
             method: 'POST',
             json: true,
             body: {
diff --git a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
index 2106cf145f..6b6f905b72 100644
--- a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
@@ -107,7 +107,7 @@ class PAIYarnTrainingService extends PAITrainingService {
                 } else {
                     throw new Error('pai cluster config format error, please set password or token!');
                 }
-
+                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 break;
 
             case TrialConfigMetadataKey.TRIAL_CONFIG:
@@ -272,7 +272,7 @@ class PAIYarnTrainingService extends PAITrainingService {
         // Step 3. Submit PAI job via Rest call
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const submitJobRequest: request.Options = {
-            uri: `http://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`,
+            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`,
             method: 'POST',
             json: true,
             body: paiJobConfig,

From 9f40659da07018ca22ab27c5c97726a2cd188852 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <scottyugochang@gmail.com>
Date: Fri, 27 Dec 2019 09:15:23 +0800
Subject: [PATCH 02/23] Fix a few issues related to fixed arc and from-tuner
 arc (#1876)

---
 .../nni/nas/pytorch/classic_nas/mutator.py    | 30 +++++++++++--------
 src/sdk/pynni/nni/nas/pytorch/fixed.py        | 17 ++++-------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py b/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
index 76f15c7646..e1a0b390f6 100644
--- a/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
@@ -10,7 +10,7 @@
 
 import nni
 from nni.env_vars import trial_env_vars
-from nni.nas.pytorch.mutables import LayerChoice, InputChoice
+from nni.nas.pytorch.mutables import LayerChoice, InputChoice, MutableScope
 from nni.nas.pytorch.mutator import Mutator
 
 logger = logging.getLogger(__name__)
@@ -104,10 +104,11 @@ def _sample_input_choice(self, mutable, idx, value, search_space_item):
         search_space_item : list
             The list for corresponding search space.
         """
+        candidate_repr = search_space_item["candidates"]
         multihot_list = [False] * mutable.n_candidates
         for i, v in zip(idx, value):
-            assert 0 <= i < mutable.n_candidates and search_space_item[i] == v, \
-                "Index '{}' in search space '{}' is not '{}'".format(i, search_space_item, v)
+            assert 0 <= i < mutable.n_candidates and candidate_repr[i] == v, \
+                "Index '{}' in search space '{}' is not '{}'".format(i, candidate_repr, v)
             assert not multihot_list[i], "'{}' is selected twice in '{}', which is not allowed.".format(i, idx)
             multihot_list[i] = True
         return torch.tensor(multihot_list, dtype=torch.bool)  # pylint: disable=not-callable
@@ -121,17 +122,20 @@ def sample_final(self):
                                                                                        self._chosen_arch.keys())
         result = dict()
         for mutable in self.mutables:
-            assert mutable.key in self._chosen_arch, "Expected '{}' in chosen arch, but not found.".format(mutable.key)
-            data = self._chosen_arch[mutable.key]
-            assert isinstance(data, dict) and "_value" in data and "_idx" in data, \
-                "'{}' is not a valid choice.".format(data)
-            value = data["_value"]
-            idx = data["_idx"]
-            search_space_item = self._search_space[mutable.key]["_value"]
+            if isinstance(mutable, (LayerChoice, InputChoice)):
+                assert mutable.key in self._chosen_arch, \
+                    "Expected '{}' in chosen arch, but not found.".format(mutable.key)
+                data = self._chosen_arch[mutable.key]
+                assert isinstance(data, dict) and "_value" in data and "_idx" in data, \
+                    "'{}' is not a valid choice.".format(data)
             if isinstance(mutable, LayerChoice):
-                result[mutable.key] = self._sample_layer_choice(mutable, idx, value, search_space_item)
+                result[mutable.key] = self._sample_layer_choice(mutable, data["_idx"], data["_value"],
+                                                                self._search_space[mutable.key]["_value"])
             elif isinstance(mutable, InputChoice):
-                result[mutable.key] = self._sample_input_choice(mutable, idx, value, search_space_item)
+                result[mutable.key] = self._sample_input_choice(mutable, data["_idx"], data["_value"],
+                                                                self._search_space[mutable.key]["_value"])
+            elif isinstance(mutable, MutableScope):
+                logger.info("Mutable scope '%s' is skipped during parsing choices.", mutable.key)
             else:
                 raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
         return result
@@ -190,6 +194,8 @@ def _generate_search_space(self):
                 search_space[key] = {"_type": INPUT_CHOICE,
                                      "_value": {"candidates": mutable.choose_from,
                                                 "n_chosen": mutable.n_chosen}}
+            elif isinstance(mutable, MutableScope):
+                logger.info("Mutable scope '%s' is skipped during generating search space.", mutable.key)
             else:
                 raise TypeError("Unsupported mutable type: '%s'." % type(mutable))
         return search_space
diff --git a/src/sdk/pynni/nni/nas/pytorch/fixed.py b/src/sdk/pynni/nni/nas/pytorch/fixed.py
index 6840097579..593a60ae44 100644
--- a/src/sdk/pynni/nni/nas/pytorch/fixed.py
+++ b/src/sdk/pynni/nni/nas/pytorch/fixed.py
@@ -41,18 +41,18 @@ def sample_final(self):
         return self._fixed_arc
 
 
-def _encode_tensor(data, device):
+def _encode_tensor(data):
     if isinstance(data, list):
         if all(map(lambda o: isinstance(o, bool), data)):
-            return torch.tensor(data, dtype=torch.bool, device=device)  # pylint: disable=not-callable
+            return torch.tensor(data, dtype=torch.bool)  # pylint: disable=not-callable
         else:
-            return torch.tensor(data, dtype=torch.float, device=device)  # pylint: disable=not-callable
+            return torch.tensor(data, dtype=torch.float)  # pylint: disable=not-callable
     if isinstance(data, dict):
-        return {k: _encode_tensor(v, device) for k, v in data.items()}
+        return {k: _encode_tensor(v) for k, v in data.items()}
     return data
 
 
-def apply_fixed_architecture(model, fixed_arc_path, device=None):
+def apply_fixed_architecture(model, fixed_arc_path):
     """
     Load architecture from `fixed_arc_path` and apply to model.
 
@@ -62,21 +62,16 @@ def apply_fixed_architecture(model, fixed_arc_path, device=None):
         Model with mutables.
     fixed_arc_path : str
         Path to the JSON that stores the architecture.
-    device : torch.device
-        Architecture weights will be transfered to `device`.
 
     Returns
     -------
     FixedArchitecture
     """
 
-    if device is None:
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     if isinstance(fixed_arc_path, str):
         with open(fixed_arc_path, "r") as f:
             fixed_arc = json.load(f)
-    fixed_arc = _encode_tensor(fixed_arc, device)
+    fixed_arc = _encode_tensor(fixed_arc)
     architecture = FixedArchitecture(model, fixed_arc)
-    architecture.to(device)
     architecture.reset()
     return architecture

From c5c0e72a35e47e88e38df608cfa0cf21510f8cd4 Mon Sep 17 00:00:00 2001
From: Cjkkkk <656569648@qq.com>
Date: Fri, 27 Dec 2019 11:21:01 +0800
Subject: [PATCH 03/23] export for default (#1883)

---
 src/sdk/pynni/nni/compression/torch/builtin_quantizers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py b/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
index 2204428574..09f88ee40e 100644
--- a/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
+++ b/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
@@ -5,7 +5,7 @@
 import torch
 from .compressor import Quantizer, QuantGrad, QuantType
 
-__all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer']
+__all__ = ['NaiveQuantizer', 'QAT_Quantizer', 'DoReFaQuantizer', 'BNNQuantizer']
 
 logger = logging.getLogger(__name__)
 

From 9b49245e6ebdfdc6d4ade82c6f9f2333afab1c0d Mon Sep 17 00:00:00 2001
From: Cjkkkk <656569648@qq.com>
Date: Sun, 29 Dec 2019 14:13:02 +0800
Subject: [PATCH 04/23] Doc for quantization (#1881)

---
 docs/en_US/Compressor/Overview.md  | 102 ++++++++++++++++++++++++-----
 docs/en_US/Compressor/Quantizer.md |  65 ++++--------------
 2 files changed, 99 insertions(+), 68 deletions(-)

diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md
index f277de5c0f..b8e2903afb 100644
--- a/docs/en_US/Compressor/Overview.md
+++ b/docs/en_US/Compressor/Overview.md
@@ -1,8 +1,11 @@
 # Compressor
+As larger neural networks with more layers and nodes are considered, reducing their storage and computational cost becomes critical, especially for some real-time applications. Model compression can be used to address this problem. 
 
 We are glad to announce the alpha release for model compression toolkit on top of NNI, it's still in the experiment phase which might evolve based on usage feedback. We'd like to invite you to use, feedback and even contribute.
 
-NNI provides an easy-to-use toolkit to help user design and use compression algorithms. It supports Tensorflow and PyTorch with unified interface. For users to compress their models, they only need to add several lines in their code. There are some popular model compression algorithms built-in in NNI. Users could further use NNI's auto tuning power to find the best compressed model, which is detailed in [Auto Model Compression](./AutoCompression.md). On the other hand, users could easily customize their new compression algorithms using NNI's interface, refer to the tutorial [here](#customize-new-compression-algorithms).
+NNI provides an easy-to-use toolkit to help user design and use compression algorithms. It currently supports PyTorch with unified interface. For users to compress their models, they only need to add several lines in their code. There are some popular model compression algorithms built-in in NNI. Users could further use NNI's auto tuning power to find the best compressed model, which is detailed in [Auto Model Compression](./AutoCompression.md). On the other hand, users could easily customize their new compression algorithms using NNI's interface, refer to the tutorial [here](#customize-new-compression-algorithms).
+
+For a survey of model compression, you can refer to this paper: [Recent Advances in Efficient Computation of Deep Convolutional Neural Networks](https://arxiv.org/pdf/1802.00939.pdf).
 
 ## Supported algorithms
 
@@ -10,6 +13,8 @@ We have provided several compression algorithms, including several pruning and q
 
 **Pruning**
 
+Pruning algorithms compress the original network by removing redundant weights or channels of layers, which can reduce model complexity and address the over-ﬁtting issue.
+
 |Name|Brief Introduction of Algorithm|
 |---|---|
 | [Level Pruner](./Pruner.md#level-pruner) | Pruning the specified ratio on each weight based on absolute values of weights |
@@ -25,11 +30,14 @@ We have provided several compression algorithms, including several pruning and q
 
 **Quantization**
 
+Quantization algorithms compress the original network by reducing the number of bits required to represent weights or activations, which can reduce the computations and the inference time.
+
 |Name|Brief Introduction of Algorithm|
 |---|---|
 | [Naive Quantizer](./Quantizer.md#naive-quantizer) |  Quantize weights to default 8 bits |
 | [QAT Quantizer](./Quantizer.md#qat-quantizer) | Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference. [Reference Paper](http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf)|
 | [DoReFa Quantizer](./Quantizer.md#dorefa-quantizer) | DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients. [Reference Paper](https://arxiv.org/abs/1606.06160)|
+| [BNN Quantizer](./Quantizer.md#BNN-Quantizer) | Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1. [Reference Paper](https://arxiv.org/abs/1602.02830)|
 
 ## Usage of built-in compression algorithms
 
@@ -61,17 +69,47 @@ The function call `pruner.compress()` modifies user defined model (in Tensorflow
 When instantiate a compression algorithm, there is `config_list` passed in. We describe how to write this config below.
 
 ### User configuration for a compression algorithm
+When compressing a model, users may want to specify the ratio for sparsity, to specify different ratios for different types of operations, to exclude certain types of operations, or to compress only a certain types of operations. For users to express these kinds of requirements, we define a configuration specification. It can be seen as a python `list` object, where each element is a `dict` object. 
+
+The `dict`s in the `list` are applied one by one, that is, the configurations in latter `dict` will overwrite the configurations in former ones on the operations that are within the scope of both of them. 
 
-When compressing a model, users may want to specify the ratio for sparsity, to specify different ratios for different types of operations, to exclude certain types of operations, or to compress only a certain types of operations. For users to express these kinds of requirements, we define a configuration specification. It can be seen as a python `list` object, where each element is a `dict` object. In each `dict`, there are some keys commonly supported by NNI compression:
+#### Common keys
+In each `dict`, there are some keys commonly supported by NNI compression:
 
 * __op_types__: This is to specify what types of operations to be compressed. 'default' means following the algorithm's default setting.
 * __op_names__: This is to specify by name what operations to be compressed. If this field is omitted, operations will not be filtered by it.
 * __exclude__: Default is False. If this field is True, it means the operations with specified types and names will be excluded from the compression.
 
-There are also other keys in the `dict`, but they are specific for every compression algorithm. For example, some , some.
+#### Keys for quantization algorithms
+**If you use quantization algorithms, you need to specify more keys. If you use pruning algorithms, you can safely skip these keys**
 
-The `dict`s in the `list` are applied one by one, that is, the configurations in latter `dict` will overwrite the configurations in former ones on the operations that are within the scope of both of them. 
+* __quant_types__ : list of string. 
+
+Type of quantization you want to apply, currently support 'weight', 'input', 'output'. 'weight' means applying quantization operation
+to the weight parameter of modules. 'input' means applying quantization operation to the input of module forward method. 'output' means applying quantization operation to the output of module forward method, which is often called as 'activation' in some papers.
+
+* __quant_bits__ : int or dict of {str : int}
 
+bits length of quantization, key is the quantization type, value is the quantization bits length, eg. 
+```
+{
+    quant_bits: {
+        'weight': 8,
+        'output': 4,
+        },
+}
+```
+when the value is int type, all quantization types share same bits length. eg. 
+```
+{
+    quant_bits: 8, # weight or output quantization are all 8 bits
+}
+```
+#### Other keys specified for every compression algorithm
+There are also other keys in the `dict`, but they are specific for every compression algorithm. For example, [Level Pruner](./Pruner.md#level-pruner) requires `sparsity` key to specify how much a model should be pruned.
+
+
+#### example
 A simple example of configuration is shown below:
 
 ```python
@@ -183,11 +221,9 @@ Some algorithms may want global information for generating masks, for example, a
 The interface for customizing quantization algorithm is similar to that of pruning algorithms. The only difference is that `calc_mask` is replaced with `quantize_weight`. `quantize_weight` directly returns the quantized weights rather than mask, because for quantization the quantized weights cannot be obtained by applying mask.
 
 ```python
-# This is writing a Quantizer in tensorflow.
-# For writing a Quantizer in PyTorch, you can simply replace
-# nni.compression.tensorflow.Quantizer with
-# nni.compression.torch.Quantizer
-class YourQuantizer(nni.compression.tensorflow.Quantizer):
+from nni.compression.torch.compressor import Quantizer
+
+class YourQuantizer(Quantizer):
     def __init__(self, model, config_list):
         """
         Suggest you to use the NNI defined spec for config
@@ -245,19 +281,55 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
 
         return new_input
 
-    # note for pytorch version, there is no sess in input arguments
-    def update_epoch(self, epoch_num, sess):
+    def update_epoch(self, epoch_num):
         pass
 
-    # note for pytorch version, there is no sess in input arguments
-    def step(self, sess):
+    def step(self):
         """
         Can do some processing based on the model or weights binded
         in the func bind_model
         """
         pass
 ```
+#### Customize backward function
+Sometimes it's necessary for a quantization operation to have a customized backward function, such as [Straight-Through Estimator](https://stackoverflow.com/questions/38361314/the-concept-of-straight-through-estimator-ste), user can customize a backward function as follow:
+
+```python
+from nni.compression.torch.compressor import Quantizer, QuantGrad, QuantType
+
+class ClipGrad(QuantGrad):
+    @staticmethod
+    def quant_backward(tensor, grad_output, quant_type):
+        """
+        This method should be overrided by subclass to provide customized backward function,
+        default implementation is Straight-Through Estimator
+        Parameters
+        ----------
+        tensor : Tensor
+            input of quantization operation
+        grad_output : Tensor
+            gradient of the output of quantization operation
+        quant_type : QuantType
+            the type of quantization, it can be `QuantType.QUANT_INPUT`, `QuantType.QUANT_WEIGHT`, `QuantType.QUANT_OUTPUT`,
+            you can define different behavior for different types.
+        Returns
+        -------
+        tensor
+            gradient of the input of quantization operation
+        """
+
+        # for quant_output function, set grad to zero if the absolute value of tensor is larger than 1
+        if quant_type == QuantType.QUANT_OUTPUT: 
+            grad_output[torch.abs(tensor) > 1] = 0
+        return grad_output
+
 
-### Usage of user customized compression algorithm
+class YourQuantizer(Quantizer):
+    def __init__(self, model, config_list):
+        super().__init__(model, config_list)
+        # set your customized backward function to overwrite default backward function
+        self.quant_grad = ClipGrad
+
+```
 
-__[TODO]__ ...
+If you do not customize `QuantGrad`, the default backward is Straight-Through Estimator. 
\ No newline at end of file
diff --git a/docs/en_US/Compressor/Quantizer.md b/docs/en_US/Compressor/Quantizer.md
index 67791117e1..3308f25c1b 100644
--- a/docs/en_US/Compressor/Quantizer.md
+++ b/docs/en_US/Compressor/Quantizer.md
@@ -6,12 +6,10 @@ We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it
 
 ### Usage
 tensorflow
-```python
-nni.compressors.tensorflow.NaiveQuantizer(model_graph).compress()
+```python nni.compression.tensorflow.NaiveQuantizer(model_graph).compress()
 ```
 pytorch
-```python
-nni.compressors.torch.NaiveQuantizer(model).compress()
+```python nni.compression.torch.NaiveQuantizer(model).compress()
 ```
 
 ***
@@ -29,7 +27,7 @@ You can quantize your model to 8 bits with the code below before your training c
 
 PyTorch code
 ```python
-from nni.compressors.torch import QAT_Quantizer
+from nni.compression.torch import QAT_Quantizer
 model = Mnist()
 
 config_list = [{
@@ -51,22 +49,9 @@ quantizer.compress()
 You can view example for more information
 
 #### User configuration for QAT Quantizer
-* **quant_types:** : list of string
-
-type of quantization you want to apply, currently support 'weight', 'input', 'output'.
-
-* **op_types:** list of string
-
-specify the type of modules that will be quantized. eg. 'Conv2D'
-
-* **op_names:** list of string
+common configuration needed by compression algorithms can be found at : [Common configuration](./Overview.md#User-configuration-for-a-compression-algorithm)
 
-specify the name of modules that will be quantized. eg. 'conv1'
-
-* **quant_bits:** int or dict of {str : int}
-
-bits length of quantization, key is the quantization type, value is the length, eg. {'weight': 8},
-when the type is int, all quantization types share same bits length.
+configuration needed by this algorithm :
 
 * **quant_start_step:** int
 
@@ -85,7 +70,7 @@ To implement DoReFa Quantizer, you can add code below before your training code
 
 PyTorch code
 ```python
-from nni.compressors.torch import DoReFaQuantizer
+from nni.compression.torch import DoReFaQuantizer
 config_list = [{ 
     'quant_types': ['weight'],
     'quant_bits': 8, 
@@ -98,22 +83,9 @@ quantizer.compress()
 You can view example for more information
 
 #### User configuration for DoReFa Quantizer
-* **quant_types:** : list of string
-
-type of quantization you want to apply, currently support 'weight', 'input', 'output'.
-
-* **op_types:** list of string
-
-specify the type of modules that will be quantized. eg. 'Conv2D'
-
-* **op_names:** list of string
+common configuration needed by compression algorithms can be found at : [Common configuration](./Overview.md#User-configuration-for-a-compression-algorithm)
 
-specify the name of modules that will be quantized. eg. 'conv1'
-
-* **quant_bits:** int or dict of {str : int}
-
-bits length of quantization, key is the quantization type, value is the length, eg. {'weight': 8},
-when the type is int, all quantization types share same bits length.
+configuration needed by this algorithm :
 
 
 ## BNN Quantizer
@@ -130,13 +102,13 @@ from nni.compression.torch import BNNQuantizer
 model = VGG_Cifar10(num_classes=10)
 
 configure_list = [{
-    'quant_types': ['weight'],
     'quant_bits': 1,
+    'quant_types': ['weight'],
     'op_types': ['Conv2d', 'Linear'],
     'op_names': ['features.0', 'features.3', 'features.7', 'features.10', 'features.14', 'features.17', 'classifier.0', 'classifier.3']
 }, {
-    'quant_types': ['output'],
     'quant_bits': 1,
+    'quant_types': ['output'],
     'op_types': ['Hardtanh'],
     'op_names': ['features.6', 'features.9', 'features.13', 'features.16', 'features.20', 'classifier.2', 'classifier.5']
 }]
@@ -148,22 +120,9 @@ model = quantizer.compress()
 You can view example [examples/model_compress/BNN_quantizer_cifar10.py]( https://github.com/microsoft/nni/tree/master/examples/model_compress/BNN_quantizer_cifar10.py) for more information.
 
 #### User configuration for BNN Quantizer
-* **quant_types:** : list of string
-
-type of quantization you want to apply, currently support 'weight', 'input', 'output'.
-
-* **op_types:** list of string
-
-specify the type of modules that will be quantized. eg. 'Conv2D'
-
-* **op_names:** list of string
-
-specify the name of modules that will be quantized. eg. 'conv1'
-
-* **quant_bits:** int or dict of {str : int}
+common configuration needed by compression algorithms can be found at : [Common configuration](./Overview.md#User-configuration-for-a-compression-algorithm)
 
-bits length of quantization, key is the quantization type, value is the length, eg. {'weight': 8},
-when the type is int, all quantization types share same bits length.
+configuration needed by this algorithm :
 
 ### Experiment
 We implemented one of the experiments in [Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](https://arxiv.org/abs/1602.02830), we quantized the **VGGNet** for CIFAR-10 in the paper. Our experiments results are as follows:

From 06db4729b00bd2d6f5582fe8528014ffe106953a Mon Sep 17 00:00:00 2001
From: QuanluZhang <Quanlu.Zhang@microsoft.com>
Date: Mon, 30 Dec 2019 07:40:22 +0800
Subject: [PATCH 05/23] refactor code structure of pruning algorithms (#1882)

---
 .../pynni/nni/compression/torch/__init__.py   |   7 +-
 .../torch/activation_rank_filter_pruners.py   | 252 ++++++
 .../nni/compression/torch/builtin_pruners.py  | 741 ------------------
 .../nni/compression/torch/lottery_ticket.py   | 150 ----
 .../pynni/nni/compression/torch/pruners.py    | 383 +++++++++
 .../{builtin_quantizers.py => quantizers.py}  |   0
 .../torch/weight_rank_filter_pruners.py       | 262 +++++++
 7 files changed, 901 insertions(+), 894 deletions(-)
 create mode 100644 src/sdk/pynni/nni/compression/torch/activation_rank_filter_pruners.py
 delete mode 100644 src/sdk/pynni/nni/compression/torch/builtin_pruners.py
 delete mode 100644 src/sdk/pynni/nni/compression/torch/lottery_ticket.py
 create mode 100644 src/sdk/pynni/nni/compression/torch/pruners.py
 rename src/sdk/pynni/nni/compression/torch/{builtin_quantizers.py => quantizers.py} (100%)
 create mode 100644 src/sdk/pynni/nni/compression/torch/weight_rank_filter_pruners.py

diff --git a/src/sdk/pynni/nni/compression/torch/__init__.py b/src/sdk/pynni/nni/compression/torch/__init__.py
index 1aa652875a..d79a8f76c4 100644
--- a/src/sdk/pynni/nni/compression/torch/__init__.py
+++ b/src/sdk/pynni/nni/compression/torch/__init__.py
@@ -2,6 +2,7 @@
 # Licensed under the MIT license.
 
 from .compressor import LayerInfo, Compressor, Pruner, Quantizer
-from .builtin_pruners import *
-from .builtin_quantizers import *
-from .lottery_ticket import LotteryTicketPruner
+from .pruners import *
+from .weight_rank_filter_pruners import *
+from .activation_rank_filter_pruners import *
+from .quantizers import *
diff --git a/src/sdk/pynni/nni/compression/torch/activation_rank_filter_pruners.py b/src/sdk/pynni/nni/compression/torch/activation_rank_filter_pruners.py
new file mode 100644
index 0000000000..d9c811531a
--- /dev/null
+++ b/src/sdk/pynni/nni/compression/torch/activation_rank_filter_pruners.py
@@ -0,0 +1,252 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import torch
+from .compressor import Pruner
+
+__all__ = ['ActivationAPoZRankFilterPruner', 'ActivationMeanRankFilterPruner']
+
+logger = logging.getLogger('torch activation rank filter pruners')
+
+class ActivationRankFilterPruner(Pruner):
+    """
+    A structured pruning base class that prunes the filters with the smallest
+    importance criterion in convolution layers (using activation values)
+    to achieve a preset level of network sparsity.
+    """
+
+    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        activation : str
+            Activation function
+        statistics_batch_num : int
+            Num of batches for activation statistics
+        """
+
+        super().__init__(model, config_list)
+        self.mask_calculated_ops = set()
+        self.statistics_batch_num = statistics_batch_num
+        self.collected_activation = {}
+        self.hooks = {}
+        assert activation in ['relu', 'relu6']
+        if activation == 'relu':
+            self.activation = torch.nn.functional.relu
+        elif activation == 'relu6':
+            self.activation = torch.nn.functional.relu6
+        else:
+            self.activation = None
+
+    def compress(self):
+        """
+        Compress the model, register a hook for collecting activations.
+        """
+        modules_to_compress = self.detect_modules_to_compress()
+        for layer, config in modules_to_compress:
+            self._instrument_layer(layer, config)
+            self.collected_activation[layer.name] = []
+
+            def _hook(module_, input_, output, name=layer.name):
+                if len(self.collected_activation[name]) < self.statistics_batch_num:
+                    self.collected_activation[name].append(self.activation(output.detach().cpu()))
+
+            layer.module.register_forward_hook(_hook)
+        return self.bound_model
+
+    def get_mask(self, base_mask, activations, num_prune):
+        raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
+
+    def calc_mask(self, layer, config):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest importance criterion which is calculated from the activation are masked.
+
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            layer's pruning config
+
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        weight = layer.module.weight.data
+        op_name = layer.name
+        op_type = layer.type
+        assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
+        assert op_type in ['Conv2d'], "only support Conv2d"
+        assert op_type in config.get('op_types')
+        if op_name in self.mask_calculated_ops:
+            assert op_name in self.mask_dict
+            return self.mask_dict.get(op_name)
+        mask_weight = torch.ones(weight.size()).type_as(weight).detach()
+        if hasattr(layer.module, 'bias') and layer.module.bias is not None:
+            mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
+        else:
+            mask_bias = None
+        mask = {'weight': mask_weight, 'bias': mask_bias}
+        try:
+            filters = weight.size(0)
+            num_prune = int(filters * config.get('sparsity'))
+            if filters < 2 or num_prune < 1 or len(self.collected_activation[layer.name]) < self.statistics_batch_num:
+                return mask
+            mask = self.get_mask(mask, self.collected_activation[layer.name], num_prune)
+        finally:
+            if len(self.collected_activation[layer.name]) == self.statistics_batch_num:
+                self.mask_dict.update({op_name: mask})
+                self.mask_calculated_ops.add(op_name)
+        return mask
+
+
+class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
+    """
+    A structured pruning algorithm that prunes the filters with the
+    smallest APoZ(average percentage of zeros) of output activations.
+    Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
+    "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
+    https://arxiv.org/abs/1607.03250
+    """
+
+    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        activation : str
+            Activation function
+        statistics_batch_num : int
+            Num of batches for activation statistics
+        """
+        super().__init__(model, config_list, activation, statistics_batch_num)
+
+    def get_mask(self, base_mask, activations, num_prune):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
+
+        Parameters
+        ----------
+        base_mask : dict
+            The basic mask with the same shape of weight, all item in the basic mask is 1.
+        activations : list
+            Layer's output activations
+        num_prune : int
+            Num of filters to prune
+
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+        apoz = self._calc_apoz(activations)
+        prune_indices = torch.argsort(apoz, descending=True)[:num_prune]
+        for idx in prune_indices:
+            base_mask['weight'][idx] = 0.
+            if base_mask['bias'] is not None:
+                base_mask['bias'][idx] = 0.
+        return base_mask
+
+    def _calc_apoz(self, activations):
+        """
+        Calculate APoZ(average percentage of zeros) of activations.
+
+        Parameters
+        ----------
+        activations : list
+            Layer's output activations
+
+        Returns
+        -------
+        torch.Tensor
+            Filter's APoZ(average percentage of zeros) of the activations
+        """
+        activations = torch.cat(activations, 0)
+        _eq_zero = torch.eq(activations, torch.zeros_like(activations))
+        _apoz = torch.sum(_eq_zero, dim=(0, 2, 3)) / torch.numel(_eq_zero[:, 0, :, :])
+        return _apoz
+
+
+class ActivationMeanRankFilterPruner(ActivationRankFilterPruner):
+    """
+    A structured pruning algorithm that prunes the filters with the
+    smallest mean value of output activations.
+    Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
+    "Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
+    https://arxiv.org/abs/1611.06440
+    """
+
+    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        activation : str
+            Activation function
+        statistics_batch_num : int
+            Num of batches for activation statistics
+        """
+        super().__init__(model, config_list, activation, statistics_batch_num)
+
+    def get_mask(self, base_mask, activations, num_prune):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
+
+        Parameters
+        ----------
+        base_mask : dict
+            The basic mask with the same shape of weight, all item in the basic mask is 1.
+        activations : list
+            Layer's output activations
+        num_prune : int
+            Num of filters to prune
+
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+        mean_activation = self._cal_mean_activation(activations)
+        prune_indices = torch.argsort(mean_activation)[:num_prune]
+        for idx in prune_indices:
+            base_mask['weight'][idx] = 0.
+            if base_mask['bias'] is not None:
+                base_mask['bias'][idx] = 0.
+        return base_mask
+
+    def _cal_mean_activation(self, activations):
+        """
+        Calculate mean value of activations.
+
+        Parameters
+        ----------
+        activations : list
+            Layer's output activations
+
+        Returns
+        -------
+        torch.Tensor
+            Filter's mean value of the output activations
+        """
+        activations = torch.cat(activations, 0)
+        mean_activation = torch.mean(activations, dim=(0, 2, 3))
+        return mean_activation
diff --git a/src/sdk/pynni/nni/compression/torch/builtin_pruners.py b/src/sdk/pynni/nni/compression/torch/builtin_pruners.py
deleted file mode 100644
index 8e19ea394d..0000000000
--- a/src/sdk/pynni/nni/compression/torch/builtin_pruners.py
+++ /dev/null
@@ -1,741 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-
-import logging
-import torch
-from .compressor import Pruner
-
-__all__ = ['LevelPruner', 'AGP_Pruner', 'SlimPruner', 'L1FilterPruner', 'L2FilterPruner', 'FPGMPruner',
-           'ActivationAPoZRankFilterPruner', 'ActivationMeanRankFilterPruner']
-
-logger = logging.getLogger('torch pruner')
-
-
-class LevelPruner(Pruner):
-    """
-    Prune to an exact pruning level specification
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            List on pruning configs
-        """
-
-        super().__init__(model, config_list)
-        self.mask_calculated_ops = set()
-
-    def calc_mask(self, layer, config):
-        """
-        Calculate the mask of given layer
-        Parameters
-        ----------
-        layer : LayerInfo
-            the layer to instrument the compression operation
-        config : dict
-            layer's pruning config
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        weight = layer.module.weight.data
-        op_name = layer.name
-        if op_name not in self.mask_calculated_ops:
-            w_abs = weight.abs()
-            k = int(weight.numel() * config['sparsity'])
-            if k == 0:
-                return torch.ones(weight.shape).type_as(weight)
-            threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
-            mask_weight = torch.gt(w_abs, threshold).type_as(weight)
-            mask = {'weight': mask_weight}
-            self.mask_dict.update({op_name: mask})
-            self.mask_calculated_ops.add(op_name)
-        else:
-            assert op_name in self.mask_dict, "op_name not in the mask_dict"
-            mask = self.mask_dict[op_name]
-        return mask
-
-
-class AGP_Pruner(Pruner):
-    """
-    An automated gradual pruning algorithm that prunes the smallest magnitude
-    weights to achieve a preset level of network sparsity.
-    Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the
-    efficacy of pruning for model compression", 2017 NIPS Workshop on Machine
-    Learning of Phones and other Consumer Devices,
-    https://arxiv.org/pdf/1710.01878.pdf
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            List on pruning configs
-        """
-
-        super().__init__(model, config_list)
-        self.now_epoch = 0
-        self.if_init_list = {}
-
-    def calc_mask(self, layer, config):
-        """
-        Calculate the mask of given layer
-        Parameters
-        ----------
-        layer : LayerInfo
-            the layer to instrument the compression operation
-        config : dict
-            layer's pruning config
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        weight = layer.module.weight.data
-        op_name = layer.name
-        start_epoch = config.get('start_epoch', 0)
-        freq = config.get('frequency', 1)
-        if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) \
-                and (self.now_epoch - start_epoch) % freq == 0:
-            mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
-            target_sparsity = self.compute_target_sparsity(config)
-            k = int(weight.numel() * target_sparsity)
-            if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
-                return mask
-            # if we want to generate new mask, we should update weigth first
-            w_abs = weight.abs() * mask
-            threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
-            new_mask = {'weight': torch.gt(w_abs, threshold).type_as(weight)}
-            self.mask_dict.update({op_name: new_mask})
-            self.if_init_list.update({op_name: False})
-        else:
-            new_mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
-        return new_mask
-
-    def compute_target_sparsity(self, config):
-        """
-        Calculate the sparsity for pruning
-        Parameters
-        ----------
-        config : dict
-            Layer's pruning config
-        Returns
-        -------
-        float
-            Target sparsity to be pruned
-        """
-
-        end_epoch = config.get('end_epoch', 1)
-        start_epoch = config.get('start_epoch', 0)
-        freq = config.get('frequency', 1)
-        final_sparsity = config.get('final_sparsity', 0)
-        initial_sparsity = config.get('initial_sparsity', 0)
-        if end_epoch <= start_epoch or initial_sparsity >= final_sparsity:
-            logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity')
-            return final_sparsity
-
-        if end_epoch <= self.now_epoch:
-            return final_sparsity
-
-        span = ((end_epoch - start_epoch - 1) // freq) * freq
-        assert span > 0
-        target_sparsity = (final_sparsity +
-                           (initial_sparsity - final_sparsity) *
-                           (1.0 - ((self.now_epoch - start_epoch) / span)) ** 3)
-        return target_sparsity
-
-    def update_epoch(self, epoch):
-        """
-        Update epoch
-        Parameters
-        ----------
-        epoch : int
-            current training epoch
-        """
-
-        if epoch > 0:
-            self.now_epoch = epoch
-            for k in self.if_init_list.keys():
-                self.if_init_list[k] = True
-
-
-class SlimPruner(Pruner):
-    """
-    A structured pruning algorithm that prunes channels by pruning the weights of BN layers.
-    Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang
-    "Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV
-    https://arxiv.org/pdf/1708.06519.pdf
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        """
-
-        super().__init__(model, config_list)
-        self.mask_calculated_ops = set()
-        weight_list = []
-        if len(config_list) > 1:
-            logger.warning('Slim pruner only supports 1 configuration')
-        config = config_list[0]
-        for (layer, config) in self.detect_modules_to_compress():
-            assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
-            weight_list.append(layer.module.weight.data.abs().clone())
-        all_bn_weights = torch.cat(weight_list)
-        k = int(all_bn_weights.shape[0] * config['sparsity'])
-        self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max()
-
-    def calc_mask(self, layer, config):
-        """
-        Calculate the mask of given layer.
-        Scale factors with the smallest absolute value in the BN layer are masked.
-        Parameters
-        ----------
-        layer : LayerInfo
-            the layer to instrument the compression operation
-        config : dict
-            layer's pruning config
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        weight = layer.module.weight.data
-        op_name = layer.name
-        op_type = layer.type
-        assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
-        if op_name in self.mask_calculated_ops:
-            assert op_name in self.mask_dict
-            return self.mask_dict.get(op_name)
-        base_mask = torch.ones(weight.size()).type_as(weight).detach()
-        mask = {'weight': base_mask.detach(), 'bias': base_mask.clone().detach()}
-        try:
-            filters = weight.size(0)
-            num_prune = int(filters * config.get('sparsity'))
-            if filters < 2 or num_prune < 1:
-                return mask
-            w_abs = weight.abs()
-            mask_weight = torch.gt(w_abs, self.global_threshold).type_as(weight)
-            mask_bias = mask_weight.clone()
-            mask = {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
-        finally:
-            self.mask_dict.update({layer.name: mask})
-            self.mask_calculated_ops.add(layer.name)
-
-        return mask
-
-
-class WeightRankFilterPruner(Pruner):
-    """
-    A structured pruning base class that prunes the filters with the smallest
-    importance criterion in convolution layers to achieve a preset level of network sparsity.
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        """
-
-        super().__init__(model, config_list)
-        self.mask_calculated_ops = set()  # operations whose mask has been calculated
-
-    def _get_mask(self, base_mask, weight, num_prune):
-        return {'weight': None, 'bias': None}
-
-    def calc_mask(self, layer, config):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest importance criterion of the kernel weights are masked.
-        Parameters
-        ----------
-        layer : LayerInfo
-            the layer to instrument the compression operation
-        config : dict
-            layer's pruning config
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        weight = layer.module.weight.data
-        op_name = layer.name
-        op_type = layer.type
-        assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
-        assert op_type in ['Conv1d', 'Conv2d'], "only support Conv1d and Conv2d"
-        assert op_type in config.get('op_types')
-        if op_name in self.mask_calculated_ops:
-            assert op_name in self.mask_dict
-            return self.mask_dict.get(op_name)
-        mask_weight = torch.ones(weight.size()).type_as(weight).detach()
-        if hasattr(layer.module, 'bias') and layer.module.bias is not None:
-            mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
-        else:
-            mask_bias = None
-        mask = {'weight': mask_weight, 'bias': mask_bias}
-        try:
-            filters = weight.size(0)
-            num_prune = int(filters * config.get('sparsity'))
-            if filters < 2 or num_prune < 1:
-                return mask
-            mask = self._get_mask(mask, weight, num_prune)
-        finally:
-            self.mask_dict.update({op_name: mask})
-            self.mask_calculated_ops.add(op_name)
-        return mask
-
-
-class L1FilterPruner(WeightRankFilterPruner):
-    """
-    A structured pruning algorithm that prunes the filters of smallest magnitude
-    weights sum in the convolution layers to achieve a preset level of network sparsity.
-    Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf,
-    "PRUNING FILTERS FOR EFFICIENT CONVNETS", 2017 ICLR
-    https://arxiv.org/abs/1608.08710
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        """
-
-        super().__init__(model, config_list)
-
-    def _get_mask(self, base_mask, weight, num_prune):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest sum of its absolute kernel weights are masked.
-        Parameters
-        ----------
-        base_mask : dict
-            The basic mask with the same shape of weight or bias, all item in the basic mask is 1.
-        weight : torch.Tensor
-            Layer's weight
-        num_prune : int
-            Num of filters to prune
-
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        filters = weight.shape[0]
-        w_abs = weight.abs()
-        w_abs_structured = w_abs.view(filters, -1).sum(dim=1)
-        threshold = torch.topk(w_abs_structured.view(-1), num_prune, largest=False)[0].max()
-        mask_weight = torch.gt(w_abs_structured, threshold)[:, None, None, None].expand_as(weight).type_as(weight)
-        mask_bias = torch.gt(w_abs_structured, threshold).type_as(weight)
-
-        return {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
-
-
-class L2FilterPruner(WeightRankFilterPruner):
-    """
-    A structured pruning algorithm that prunes the filters with the
-    smallest L2 norm of the weights.
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        """
-
-        super().__init__(model, config_list)
-
-    def _get_mask(self, base_mask, weight, num_prune):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest L2 norm of the absolute kernel weights are masked.
-        Parameters
-        ----------
-        base_mask : dict
-            The basic mask with the same shape of weight or bias, all item in the basic mask is 1.
-        weight : torch.Tensor
-            Layer's weight
-        num_prune : int
-            Num of filters to prune
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-        filters = weight.shape[0]
-        w = weight.view(filters, -1)
-        w_l2_norm = torch.sqrt((w ** 2).sum(dim=1))
-        threshold = torch.topk(w_l2_norm.view(-1), num_prune, largest=False)[0].max()
-        mask_weight = torch.gt(w_l2_norm, threshold)[:, None, None, None].expand_as(weight).type_as(weight)
-        mask_bias = torch.gt(w_l2_norm, threshold).type_as(weight)
-
-        return {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
-
-
-class FPGMPruner(WeightRankFilterPruner):
-    """
-    A filter pruner via geometric median.
-    "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration",
-    https://arxiv.org/pdf/1811.00250.pdf
-    """
-
-    def __init__(self, model, config_list):
-        """
-        Parameters
-        ----------
-        model : pytorch model
-            the model user wants to compress
-        config_list: list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        """
-        super().__init__(model, config_list)
-
-    def _get_mask(self, base_mask, weight, num_prune):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest sum of its absolute kernel weights are masked.
-        Parameters
-        ----------
-        base_mask : dict
-            The basic mask with the same shape of weight and bias, all item in the basic mask is 1.
-        weight : torch.Tensor
-            Layer's weight
-        num_prune : int
-            Num of filters to prune
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-        min_gm_idx = self._get_min_gm_kernel_idx(weight, num_prune)
-        for idx in min_gm_idx:
-            base_mask['weight'][idx] = 0.
-            if base_mask['bias'] is not None:
-                base_mask['bias'][idx] = 0.
-        return base_mask
-
-    def _get_min_gm_kernel_idx(self, weight, n):
-        assert len(weight.size()) in [3, 4]
-
-        dist_list = []
-        for out_i in range(weight.size(0)):
-            dist_sum = self._get_distance_sum(weight, out_i)
-            dist_list.append((dist_sum, out_i))
-        min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n]
-        return [x[1] for x in min_gm_kernels]
-
-    def _get_distance_sum(self, weight, out_idx):
-        """
-        Calculate the total distance between a specified filter (by out_idex and in_idx) and
-        all other filters.
-        Optimized verision of following naive implementation:
-        def _get_distance_sum(self, weight, in_idx, out_idx):
-            w = weight.view(-1, weight.size(-2), weight.size(-1))
-            dist_sum = 0.
-            for k in w:
-                dist_sum += torch.dist(k, weight[in_idx, out_idx], p=2)
-            return dist_sum
-        Parameters
-        ----------
-        weight: Tensor
-            convolutional filter weight
-        out_idx: int
-            output channel index of specified filter, this method calculates the total distance
-            between this specified filter and all other filters.
-        Returns
-        -------
-        float32
-            The total distance
-        """
-        logger.debug('weight size: %s', weight.size())
-        assert len(weight.size()) in [3, 4], 'unsupported weight shape'
-
-        w = weight.view(weight.size(0), -1)
-        anchor_w = w[out_idx].unsqueeze(0).expand(w.size(0), w.size(1))
-        x = w - anchor_w
-        x = (x * x).sum(-1)
-        x = torch.sqrt(x)
-        return x.sum()
-
-    def update_epoch(self, epoch):
-        self.mask_calculated_ops = set()
-
-
-class ActivationRankFilterPruner(Pruner):
-    """
-    A structured pruning base class that prunes the filters with the smallest
-    importance criterion in convolution layers to achieve a preset level of network sparsity.
-    Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
-    "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
-    https://arxiv.org/abs/1607.03250
-    Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
-    "Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
-    https://arxiv.org/abs/1611.06440
-    """
-
-    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        activation : str
-            Activation function
-        statistics_batch_num : int
-            Num of batches for activation statistics
-        """
-
-        super().__init__(model, config_list)
-        self.mask_calculated_ops = set()
-        self.statistics_batch_num = statistics_batch_num
-        self.collected_activation = {}
-        self.hooks = {}
-        assert activation in ['relu', 'relu6']
-        if activation == 'relu':
-            self.activation = torch.nn.functional.relu
-        elif activation == 'relu6':
-            self.activation = torch.nn.functional.relu6
-        else:
-            self.activation = None
-
-    def compress(self):
-        """
-        Compress the model, register a hook for collecting activations.
-        """
-        modules_to_compress = self.detect_modules_to_compress()
-        for layer, config in modules_to_compress:
-            self._instrument_layer(layer, config)
-            self.collected_activation[layer.name] = []
-
-            def _hook(module_, input_, output, name=layer.name):
-                if len(self.collected_activation[name]) < self.statistics_batch_num:
-                    self.collected_activation[name].append(self.activation(output.detach().cpu()))
-
-            layer.module.register_forward_hook(_hook)
-        return self.bound_model
-
-    def _get_mask(self, base_mask, activations, num_prune):
-        return {'weight': None, 'bias': None}
-
-    def calc_mask(self, layer, config):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest importance criterion which is calculated from the activation are masked.
-
-        Parameters
-        ----------
-        layer : LayerInfo
-            the layer to instrument the compression operation
-        config : dict
-            layer's pruning config
-
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-
-        weight = layer.module.weight.data
-        op_name = layer.name
-        op_type = layer.type
-        assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
-        assert op_type in ['Conv2d'], "only support Conv2d"
-        assert op_type in config.get('op_types')
-        if op_name in self.mask_calculated_ops:
-            assert op_name in self.mask_dict
-            return self.mask_dict.get(op_name)
-        mask_weight = torch.ones(weight.size()).type_as(weight).detach()
-        if hasattr(layer.module, 'bias') and layer.module.bias is not None:
-            mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
-        else:
-            mask_bias = None
-        mask = {'weight': mask_weight, 'bias': mask_bias}
-        try:
-            filters = weight.size(0)
-            num_prune = int(filters * config.get('sparsity'))
-            if filters < 2 or num_prune < 1 or len(self.collected_activation[layer.name]) < self.statistics_batch_num:
-                return mask
-            mask = self._get_mask(mask, self.collected_activation[layer.name], num_prune)
-        finally:
-            if len(self.collected_activation[layer.name]) == self.statistics_batch_num:
-                self.mask_dict.update({op_name: mask})
-                self.mask_calculated_ops.add(op_name)
-        return mask
-
-
-class ActivationAPoZRankFilterPruner(ActivationRankFilterPruner):
-    """
-    A structured pruning algorithm that prunes the filters with the
-    smallest APoZ(average percentage of zeros) of output activations.
-    Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
-    "Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures", ICLR 2016.
-    https://arxiv.org/abs/1607.03250
-    """
-
-    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        activation : str
-            Activation function
-        statistics_batch_num : int
-            Num of batches for activation statistics
-        """
-        super().__init__(model, config_list, activation, statistics_batch_num)
-
-    def _get_mask(self, base_mask, activations, num_prune):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
-
-        Parameters
-        ----------
-        base_mask : dict
-            The basic mask with the same shape of weight, all item in the basic mask is 1.
-        activations : list
-            Layer's output activations
-        num_prune : int
-            Num of filters to prune
-
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-        apoz = self._calc_apoz(activations)
-        prune_indices = torch.argsort(apoz, descending=True)[:num_prune]
-        for idx in prune_indices:
-            base_mask['weight'][idx] = 0.
-            if base_mask['bias'] is not None:
-                base_mask['bias'][idx] = 0.
-        return base_mask
-
-    def _calc_apoz(self, activations):
-        """
-        Calculate APoZ(average percentage of zeros) of activations.
-
-        Parameters
-        ----------
-        activations : list
-            Layer's output activations
-
-        Returns
-        -------
-        torch.Tensor
-            Filter's APoZ(average percentage of zeros) of the activations
-        """
-        activations = torch.cat(activations, 0)
-        _eq_zero = torch.eq(activations, torch.zeros_like(activations))
-        _apoz = torch.sum(_eq_zero, dim=(0, 2, 3)) / torch.numel(_eq_zero[:, 0, :, :])
-        return _apoz
-
-
-class ActivationMeanRankFilterPruner(ActivationRankFilterPruner):
-    """
-    A structured pruning algorithm that prunes the filters with the
-    smallest mean value of output activations.
-    Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
-    "Pruning Convolutional Neural Networks for Resource Efficient Inference", ICLR 2017.
-    https://arxiv.org/abs/1611.06440
-    """
-
-    def __init__(self, model, config_list, activation='relu', statistics_batch_num=1):
-        """
-        Parameters
-        ----------
-        model : torch.nn.module
-            Model to be pruned
-        config_list : list
-            support key for each list item:
-                - sparsity: percentage of convolutional filters to be pruned.
-        activation : str
-            Activation function
-        statistics_batch_num : int
-            Num of batches for activation statistics
-        """
-        super().__init__(model, config_list, activation, statistics_batch_num)
-
-    def _get_mask(self, base_mask, activations, num_prune):
-        """
-        Calculate the mask of given layer.
-        Filters with the smallest APoZ(average percentage of zeros) of output activations are masked.
-
-        Parameters
-        ----------
-        base_mask : dict
-            The basic mask with the same shape of weight, all item in the basic mask is 1.
-        activations : list
-            Layer's output activations
-        num_prune : int
-            Num of filters to prune
-
-        Returns
-        -------
-        dict
-            dictionary for storing masks
-        """
-        mean_activation = self._cal_mean_activation(activations)
-        prune_indices = torch.argsort(mean_activation)[:num_prune]
-        for idx in prune_indices:
-            base_mask['weight'][idx] = 0.
-            if base_mask['bias'] is not None:
-                base_mask['bias'][idx] = 0.
-        return base_mask
-
-    def _cal_mean_activation(self, activations):
-        """
-        Calculate mean value of activations.
-
-        Parameters
-        ----------
-        activations : list
-            Layer's output activations
-
-        Returns
-        -------
-        torch.Tensor
-            Filter's mean value of the output activations
-        """
-        activations = torch.cat(activations, 0)
-        mean_activation = torch.mean(activations, dim=(0, 2, 3))
-        return mean_activation
diff --git a/src/sdk/pynni/nni/compression/torch/lottery_ticket.py b/src/sdk/pynni/nni/compression/torch/lottery_ticket.py
deleted file mode 100644
index 233d90ced8..0000000000
--- a/src/sdk/pynni/nni/compression/torch/lottery_ticket.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import copy
-import logging
-import torch
-from .compressor import Pruner
-
-_logger = logging.getLogger(__name__)
-
-
-class LotteryTicketPruner(Pruner):
-    """
-    This is a Pytorch implementation of the paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks",
-    following NNI model compression interface.
-
-    1. Randomly initialize a neural network f(x;theta_0) (where theta_0 follows D_{theta}).
-    2. Train the network for j iterations, arriving at parameters theta_j.
-    3. Prune p% of the parameters in theta_j, creating a mask m.
-    4. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0).
-    5. Repeat step 2, 3, and 4.
-    """
-
-    def __init__(self, model, config_list, optimizer, lr_scheduler=None, reset_weights=True):
-        """
-        Parameters
-        ----------
-        model : pytorch model
-            The model to be pruned
-        config_list : list
-            Supported keys:
-                - prune_iterations : The number of rounds for the iterative pruning.
-                - sparsity : The final sparsity when the compression is done.
-        optimizer : pytorch optimizer
-            The optimizer for the model
-        lr_scheduler : pytorch lr scheduler
-            The lr scheduler for the model if used
-        reset_weights : bool
-            Whether reset weights and optimizer at the beginning of each round.
-        """
-        super().__init__(model, config_list)
-        self.curr_prune_iteration = None
-        self.prune_iterations = self._validate_config(config_list)
-
-        # save init weights and optimizer
-        self.reset_weights = reset_weights
-        if self.reset_weights:
-            self._model = model
-            self._optimizer = optimizer
-            self._model_state = copy.deepcopy(model.state_dict())
-            self._optimizer_state = copy.deepcopy(optimizer.state_dict())
-            self._lr_scheduler = lr_scheduler
-            if lr_scheduler is not None:
-                self._scheduler_state = copy.deepcopy(lr_scheduler.state_dict())
-
-    def _validate_config(self, config_list):
-        prune_iterations = None
-        for config in config_list:
-            assert 'prune_iterations' in config, 'prune_iterations must exist in your config'
-            assert 'sparsity' in config, 'sparsity must exist in your config'
-            if prune_iterations is not None:
-                assert prune_iterations == config[
-                    'prune_iterations'], 'The values of prune_iterations must be equal in your config'
-            prune_iterations = config['prune_iterations']
-        return prune_iterations
-
-    def _print_masks(self, print_mask=False):
-        torch.set_printoptions(threshold=1000)
-        for op_name in self.mask_dict.keys():
-            mask = self.mask_dict[op_name]
-            print('op name: ', op_name)
-            if print_mask:
-                print('mask: ', mask)
-            # calculate current sparsity
-            mask_num = mask['weight'].sum().item()
-            mask_size = mask['weight'].numel()
-            print('sparsity: ', 1 - mask_num / mask_size)
-        torch.set_printoptions(profile='default')
-
-    def _calc_sparsity(self, sparsity):
-        keep_ratio_once = (1 - sparsity) ** (1 / self.prune_iterations)
-        curr_keep_ratio = keep_ratio_once ** self.curr_prune_iteration
-        return max(1 - curr_keep_ratio, 0)
-
-    def _calc_mask(self, weight, sparsity, op_name):
-        if self.curr_prune_iteration == 0:
-            mask = torch.ones(weight.shape).type_as(weight)
-        else:
-            curr_sparsity = self._calc_sparsity(sparsity)
-            assert self.mask_dict.get(op_name) is not None
-            curr_mask = self.mask_dict.get(op_name)
-            w_abs = weight.abs() * curr_mask['weight']
-            k = int(w_abs.numel() * curr_sparsity)
-            threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
-            mask = torch.gt(w_abs, threshold).type_as(weight)
-        return {'weight': mask}
-
-    def calc_mask(self, layer, config):
-        """
-        Generate mask for the given ``weight``.
-
-        Parameters
-        ----------
-        layer : LayerInfo
-            The layer to be pruned
-        config : dict
-            Pruning configurations for this weight
-
-        Returns
-        -------
-        tensor
-            The mask for this weight
-        """
-        assert self.mask_dict.get(layer.name) is not None, 'Please call iteration_start before training'
-        mask = self.mask_dict[layer.name]
-        return mask
-
-    def get_prune_iterations(self):
-        """
-        Return the range for iterations.
-        In the first prune iteration, masks are all one, thus, add one more iteration
-
-        Returns
-        -------
-        list
-            A list for pruning iterations
-        """
-        return range(self.prune_iterations + 1)
-
-    def prune_iteration_start(self):
-        """
-        Control the pruning procedure on updated epoch number.
-        Should be called at the beginning of the epoch.
-        """
-        if self.curr_prune_iteration is None:
-            self.curr_prune_iteration = 0
-        else:
-            self.curr_prune_iteration += 1
-        assert self.curr_prune_iteration < self.prune_iterations + 1, 'Exceed the configured prune_iterations'
-
-        modules_to_compress = self.detect_modules_to_compress()
-        for layer, config in modules_to_compress:
-            sparsity = config.get('sparsity')
-            mask = self._calc_mask(layer.module.weight.data, sparsity, layer.name)
-            self.mask_dict.update({layer.name: mask})
-        self._print_masks()
-
-        # reinit weights back to original after new masks are generated
-        if self.reset_weights:
-            self._model.load_state_dict(self._model_state)
-            self._optimizer.load_state_dict(self._optimizer_state)
-            if self._lr_scheduler is not None:
-                self._lr_scheduler.load_state_dict(self._scheduler_state)
diff --git a/src/sdk/pynni/nni/compression/torch/pruners.py b/src/sdk/pynni/nni/compression/torch/pruners.py
new file mode 100644
index 0000000000..82f37a488c
--- /dev/null
+++ b/src/sdk/pynni/nni/compression/torch/pruners.py
@@ -0,0 +1,383 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import copy
+import logging
+import torch
+from .compressor import Pruner
+
+__all__ = ['LevelPruner', 'AGP_Pruner', 'SlimPruner', 'LotteryTicketPruner']
+
+logger = logging.getLogger('torch pruner')
+
+
+class LevelPruner(Pruner):
+    """
+    Prune to an exact pruning level specification
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            List on pruning configs
+        """
+
+        super().__init__(model, config_list)
+        self.mask_calculated_ops = set()
+
+    def calc_mask(self, layer, config):
+        """
+        Calculate the mask of given layer
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            layer's pruning config
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        weight = layer.module.weight.data
+        op_name = layer.name
+        if op_name not in self.mask_calculated_ops:
+            w_abs = weight.abs()
+            k = int(weight.numel() * config['sparsity'])
+            if k == 0:
+                return torch.ones(weight.shape).type_as(weight)
+            threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
+            mask_weight = torch.gt(w_abs, threshold).type_as(weight)
+            mask = {'weight': mask_weight}
+            self.mask_dict.update({op_name: mask})
+            self.mask_calculated_ops.add(op_name)
+        else:
+            assert op_name in self.mask_dict, "op_name not in the mask_dict"
+            mask = self.mask_dict[op_name]
+        return mask
+
+
+class AGP_Pruner(Pruner):
+    """
+    An automated gradual pruning algorithm that prunes the smallest magnitude
+    weights to achieve a preset level of network sparsity.
+    Michael Zhu and Suyog Gupta, "To prune, or not to prune: exploring the
+    efficacy of pruning for model compression", 2017 NIPS Workshop on Machine
+    Learning of Phones and other Consumer Devices,
+    https://arxiv.org/pdf/1710.01878.pdf
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            List on pruning configs
+        """
+
+        super().__init__(model, config_list)
+        self.now_epoch = 0
+        self.if_init_list = {}
+
+    def calc_mask(self, layer, config):
+        """
+        Calculate the mask of given layer
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            layer's pruning config
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        weight = layer.module.weight.data
+        op_name = layer.name
+        start_epoch = config.get('start_epoch', 0)
+        freq = config.get('frequency', 1)
+        if self.now_epoch >= start_epoch and self.if_init_list.get(op_name, True) \
+                and (self.now_epoch - start_epoch) % freq == 0:
+            mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
+            target_sparsity = self.compute_target_sparsity(config)
+            k = int(weight.numel() * target_sparsity)
+            if k == 0 or target_sparsity >= 1 or target_sparsity <= 0:
+                return mask
+            # if we want to generate new mask, we should update weigth first
+            w_abs = weight.abs() * mask
+            threshold = torch.topk(w_abs.view(-1), k, largest=False)[0].max()
+            new_mask = {'weight': torch.gt(w_abs, threshold).type_as(weight)}
+            self.mask_dict.update({op_name: new_mask})
+            self.if_init_list.update({op_name: False})
+        else:
+            new_mask = self.mask_dict.get(op_name, {'weight': torch.ones(weight.shape).type_as(weight)})
+        return new_mask
+
+    def compute_target_sparsity(self, config):
+        """
+        Calculate the sparsity for pruning
+        Parameters
+        ----------
+        config : dict
+            Layer's pruning config
+        Returns
+        -------
+        float
+            Target sparsity to be pruned
+        """
+
+        end_epoch = config.get('end_epoch', 1)
+        start_epoch = config.get('start_epoch', 0)
+        freq = config.get('frequency', 1)
+        final_sparsity = config.get('final_sparsity', 0)
+        initial_sparsity = config.get('initial_sparsity', 0)
+        if end_epoch <= start_epoch or initial_sparsity >= final_sparsity:
+            logger.warning('your end epoch <= start epoch or initial_sparsity >= final_sparsity')
+            return final_sparsity
+
+        if end_epoch <= self.now_epoch:
+            return final_sparsity
+
+        span = ((end_epoch - start_epoch - 1) // freq) * freq
+        assert span > 0
+        target_sparsity = (final_sparsity +
+                           (initial_sparsity - final_sparsity) *
+                           (1.0 - ((self.now_epoch - start_epoch) / span)) ** 3)
+        return target_sparsity
+
+    def update_epoch(self, epoch):
+        """
+        Update epoch
+        Parameters
+        ----------
+        epoch : int
+            current training epoch
+        """
+
+        if epoch > 0:
+            self.now_epoch = epoch
+            for k in self.if_init_list.keys():
+                self.if_init_list[k] = True
+
+
+class SlimPruner(Pruner):
+    """
+    A structured pruning algorithm that prunes channels by pruning the weights of BN layers.
+    Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang
+    "Learning Efficient Convolutional Networks through Network Slimming", 2017 ICCV
+    https://arxiv.org/pdf/1708.06519.pdf
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        """
+
+        super().__init__(model, config_list)
+        self.mask_calculated_ops = set()
+        weight_list = []
+        if len(config_list) > 1:
+            logger.warning('Slim pruner only supports 1 configuration')
+        config = config_list[0]
+        for (layer, config) in self.detect_modules_to_compress():
+            assert layer.type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
+            weight_list.append(layer.module.weight.data.abs().clone())
+        all_bn_weights = torch.cat(weight_list)
+        k = int(all_bn_weights.shape[0] * config['sparsity'])
+        self.global_threshold = torch.topk(all_bn_weights.view(-1), k, largest=False)[0].max()
+
+    def calc_mask(self, layer, config):
+        """
+        Calculate the mask of given layer.
+        Scale factors with the smallest absolute value in the BN layer are masked.
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            layer's pruning config
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        weight = layer.module.weight.data
+        op_name = layer.name
+        op_type = layer.type
+        assert op_type == 'BatchNorm2d', 'SlimPruner only supports 2d batch normalization layer pruning'
+        if op_name in self.mask_calculated_ops:
+            assert op_name in self.mask_dict
+            return self.mask_dict.get(op_name)
+        base_mask = torch.ones(weight.size()).type_as(weight).detach()
+        mask = {'weight': base_mask.detach(), 'bias': base_mask.clone().detach()}
+        try:
+            filters = weight.size(0)
+            num_prune = int(filters * config.get('sparsity'))
+            if filters < 2 or num_prune < 1:
+                return mask
+            w_abs = weight.abs()
+            mask_weight = torch.gt(w_abs, self.global_threshold).type_as(weight)
+            mask_bias = mask_weight.clone()
+            mask = {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
+        finally:
+            self.mask_dict.update({layer.name: mask})
+            self.mask_calculated_ops.add(layer.name)
+
+        return mask
+
+class LotteryTicketPruner(Pruner):
+    """
+    This is a Pytorch implementation of the paper "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks",
+    following NNI model compression interface.
+
+    1. Randomly initialize a neural network f(x;theta_0) (where theta_0 follows D_{theta}).
+    2. Train the network for j iterations, arriving at parameters theta_j.
+    3. Prune p% of the parameters in theta_j, creating a mask m.
+    4. Reset the remaining parameters to their values in theta_0, creating the winning ticket f(x;m*theta_0).
+    5. Repeat step 2, 3, and 4.
+    """
+
+    def __init__(self, model, config_list, optimizer, lr_scheduler=None, reset_weights=True):
+        """
+        Parameters
+        ----------
+        model : pytorch model
+            The model to be pruned
+        config_list : list
+            Supported keys:
+                - prune_iterations : The number of rounds for the iterative pruning.
+                - sparsity : The final sparsity when the compression is done.
+        optimizer : pytorch optimizer
+            The optimizer for the model
+        lr_scheduler : pytorch lr scheduler
+            The lr scheduler for the model if used
+        reset_weights : bool
+            Whether reset weights and optimizer at the beginning of each round.
+        """
+        super().__init__(model, config_list)
+        self.curr_prune_iteration = None
+        self.prune_iterations = self._validate_config(config_list)
+
+        # save init weights and optimizer
+        self.reset_weights = reset_weights
+        if self.reset_weights:
+            self._model = model
+            self._optimizer = optimizer
+            self._model_state = copy.deepcopy(model.state_dict())
+            self._optimizer_state = copy.deepcopy(optimizer.state_dict())
+            self._lr_scheduler = lr_scheduler
+            if lr_scheduler is not None:
+                self._scheduler_state = copy.deepcopy(lr_scheduler.state_dict())
+
+    def _validate_config(self, config_list):
+        prune_iterations = None
+        for config in config_list:
+            assert 'prune_iterations' in config, 'prune_iterations must exist in your config'
+            assert 'sparsity' in config, 'sparsity must exist in your config'
+            if prune_iterations is not None:
+                assert prune_iterations == config[
+                    'prune_iterations'], 'The values of prune_iterations must be equal in your config'
+            prune_iterations = config['prune_iterations']
+        return prune_iterations
+
+    def _print_masks(self, print_mask=False):
+        torch.set_printoptions(threshold=1000)
+        for op_name in self.mask_dict.keys():
+            mask = self.mask_dict[op_name]
+            print('op name: ', op_name)
+            if print_mask:
+                print('mask: ', mask)
+            # calculate current sparsity
+            mask_num = mask['weight'].sum().item()
+            mask_size = mask['weight'].numel()
+            print('sparsity: ', 1 - mask_num / mask_size)
+        torch.set_printoptions(profile='default')
+
+    def _calc_sparsity(self, sparsity):
+        keep_ratio_once = (1 - sparsity) ** (1 / self.prune_iterations)
+        curr_keep_ratio = keep_ratio_once ** self.curr_prune_iteration
+        return max(1 - curr_keep_ratio, 0)
+
+    def _calc_mask(self, weight, sparsity, op_name):
+        if self.curr_prune_iteration == 0:
+            mask = torch.ones(weight.shape).type_as(weight)
+        else:
+            curr_sparsity = self._calc_sparsity(sparsity)
+            assert self.mask_dict.get(op_name) is not None
+            curr_mask = self.mask_dict.get(op_name)
+            w_abs = weight.abs() * curr_mask['weight']
+            k = int(w_abs.numel() * curr_sparsity)
+            threshold = torch.topk(w_abs.view(-1), k, largest=False).values.max()
+            mask = torch.gt(w_abs, threshold).type_as(weight)
+        return {'weight': mask}
+
+    def calc_mask(self, layer, config):
+        """
+        Generate mask for the given ``weight``.
+
+        Parameters
+        ----------
+        layer : LayerInfo
+            The layer to be pruned
+        config : dict
+            Pruning configurations for this weight
+
+        Returns
+        -------
+        tensor
+            The mask for this weight
+        """
+        assert self.mask_dict.get(layer.name) is not None, 'Please call iteration_start before training'
+        mask = self.mask_dict[layer.name]
+        return mask
+
+    def get_prune_iterations(self):
+        """
+        Return the range for iterations.
+        In the first prune iteration, masks are all one, thus, add one more iteration
+
+        Returns
+        -------
+        list
+            A list for pruning iterations
+        """
+        return range(self.prune_iterations + 1)
+
+    def prune_iteration_start(self):
+        """
+        Control the pruning procedure on updated epoch number.
+        Should be called at the beginning of the epoch.
+        """
+        if self.curr_prune_iteration is None:
+            self.curr_prune_iteration = 0
+        else:
+            self.curr_prune_iteration += 1
+        assert self.curr_prune_iteration < self.prune_iterations + 1, 'Exceed the configured prune_iterations'
+
+        modules_to_compress = self.detect_modules_to_compress()
+        for layer, config in modules_to_compress:
+            sparsity = config.get('sparsity')
+            mask = self._calc_mask(layer.module.weight.data, sparsity, layer.name)
+            self.mask_dict.update({layer.name: mask})
+        self._print_masks()
+
+        # reinit weights back to original after new masks are generated
+        if self.reset_weights:
+            self._model.load_state_dict(self._model_state)
+            self._optimizer.load_state_dict(self._optimizer_state)
+            if self._lr_scheduler is not None:
+                self._lr_scheduler.load_state_dict(self._scheduler_state)
diff --git a/src/sdk/pynni/nni/compression/torch/builtin_quantizers.py b/src/sdk/pynni/nni/compression/torch/quantizers.py
similarity index 100%
rename from src/sdk/pynni/nni/compression/torch/builtin_quantizers.py
rename to src/sdk/pynni/nni/compression/torch/quantizers.py
diff --git a/src/sdk/pynni/nni/compression/torch/weight_rank_filter_pruners.py b/src/sdk/pynni/nni/compression/torch/weight_rank_filter_pruners.py
new file mode 100644
index 0000000000..918ed95ec1
--- /dev/null
+++ b/src/sdk/pynni/nni/compression/torch/weight_rank_filter_pruners.py
@@ -0,0 +1,262 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import torch
+from .compressor import Pruner
+
+__all__ = ['L1FilterPruner', 'L2FilterPruner', 'FPGMPruner']
+
+logger = logging.getLogger('torch weight rank filter pruners')
+
+class WeightRankFilterPruner(Pruner):
+    """
+    A structured pruning base class that prunes the filters with the smallest
+    importance criterion in convolution layers to achieve a preset level of network sparsity.
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        """
+
+        super().__init__(model, config_list)
+        self.mask_calculated_ops = set()  # operations whose mask has been calculated
+
+    def get_mask(self, base_mask, weight, num_prune):
+        raise NotImplementedError('{} get_mask is not implemented'.format(self.__class__.__name__))
+
+    def calc_mask(self, layer, config):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest importance criterion of the kernel weights are masked.
+        Parameters
+        ----------
+        layer : LayerInfo
+            the layer to instrument the compression operation
+        config : dict
+            layer's pruning config
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        weight = layer.module.weight.data
+        op_name = layer.name
+        op_type = layer.type
+        assert 0 <= config.get('sparsity') < 1, "sparsity must in the range [0, 1)"
+        assert op_type in ['Conv1d', 'Conv2d'], "only support Conv1d and Conv2d"
+        assert op_type in config.get('op_types')
+        if op_name in self.mask_calculated_ops:
+            assert op_name in self.mask_dict
+            return self.mask_dict.get(op_name)
+        mask_weight = torch.ones(weight.size()).type_as(weight).detach()
+        if hasattr(layer.module, 'bias') and layer.module.bias is not None:
+            mask_bias = torch.ones(layer.module.bias.size()).type_as(layer.module.bias).detach()
+        else:
+            mask_bias = None
+        mask = {'weight': mask_weight, 'bias': mask_bias}
+        try:
+            filters = weight.size(0)
+            num_prune = int(filters * config.get('sparsity'))
+            if filters < 2 or num_prune < 1:
+                return mask
+            mask = self.get_mask(mask, weight, num_prune)
+        finally:
+            self.mask_dict.update({op_name: mask})
+            self.mask_calculated_ops.add(op_name)
+        return mask
+
+
+class L1FilterPruner(WeightRankFilterPruner):
+    """
+    A structured pruning algorithm that prunes the filters of smallest magnitude
+    weights sum in the convolution layers to achieve a preset level of network sparsity.
+    Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf,
+    "PRUNING FILTERS FOR EFFICIENT CONVNETS", 2017 ICLR
+    https://arxiv.org/abs/1608.08710
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        """
+
+        super().__init__(model, config_list)
+
+    def get_mask(self, base_mask, weight, num_prune):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest sum of its absolute kernel weights are masked.
+        Parameters
+        ----------
+        base_mask : dict
+            The basic mask with the same shape of weight or bias, all item in the basic mask is 1.
+        weight : torch.Tensor
+            Layer's weight
+        num_prune : int
+            Num of filters to prune
+
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+
+        filters = weight.shape[0]
+        w_abs = weight.abs()
+        w_abs_structured = w_abs.view(filters, -1).sum(dim=1)
+        threshold = torch.topk(w_abs_structured.view(-1), num_prune, largest=False)[0].max()
+        mask_weight = torch.gt(w_abs_structured, threshold)[:, None, None, None].expand_as(weight).type_as(weight)
+        mask_bias = torch.gt(w_abs_structured, threshold).type_as(weight)
+
+        return {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
+
+
+class L2FilterPruner(WeightRankFilterPruner):
+    """
+    A structured pruning algorithm that prunes the filters with the
+    smallest L2 norm of the weights.
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : torch.nn.module
+            Model to be pruned
+        config_list : list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        """
+
+        super().__init__(model, config_list)
+
+    def get_mask(self, base_mask, weight, num_prune):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest L2 norm of the absolute kernel weights are masked.
+        Parameters
+        ----------
+        base_mask : dict
+            The basic mask with the same shape of weight or bias, all item in the basic mask is 1.
+        weight : torch.Tensor
+            Layer's weight
+        num_prune : int
+            Num of filters to prune
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+        filters = weight.shape[0]
+        w = weight.view(filters, -1)
+        w_l2_norm = torch.sqrt((w ** 2).sum(dim=1))
+        threshold = torch.topk(w_l2_norm.view(-1), num_prune, largest=False)[0].max()
+        mask_weight = torch.gt(w_l2_norm, threshold)[:, None, None, None].expand_as(weight).type_as(weight)
+        mask_bias = torch.gt(w_l2_norm, threshold).type_as(weight)
+
+        return {'weight': mask_weight.detach(), 'bias': mask_bias.detach()}
+
+
+class FPGMPruner(WeightRankFilterPruner):
+    """
+    A filter pruner via geometric median.
+    "Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration",
+    https://arxiv.org/pdf/1811.00250.pdf
+    """
+
+    def __init__(self, model, config_list):
+        """
+        Parameters
+        ----------
+        model : pytorch model
+            the model user wants to compress
+        config_list: list
+            support key for each list item:
+                - sparsity: percentage of convolutional filters to be pruned.
+        """
+        super().__init__(model, config_list)
+
+    def get_mask(self, base_mask, weight, num_prune):
+        """
+        Calculate the mask of given layer.
+        Filters with the smallest sum of its absolute kernel weights are masked.
+        Parameters
+        ----------
+        base_mask : dict
+            The basic mask with the same shape of weight and bias, all item in the basic mask is 1.
+        weight : torch.Tensor
+            Layer's weight
+        num_prune : int
+            Num of filters to prune
+        Returns
+        -------
+        dict
+            dictionary for storing masks
+        """
+        min_gm_idx = self._get_min_gm_kernel_idx(weight, num_prune)
+        for idx in min_gm_idx:
+            base_mask['weight'][idx] = 0.
+            if base_mask['bias'] is not None:
+                base_mask['bias'][idx] = 0.
+        return base_mask
+
+    def _get_min_gm_kernel_idx(self, weight, n):
+        assert len(weight.size()) in [3, 4]
+
+        dist_list = []
+        for out_i in range(weight.size(0)):
+            dist_sum = self._get_distance_sum(weight, out_i)
+            dist_list.append((dist_sum, out_i))
+        min_gm_kernels = sorted(dist_list, key=lambda x: x[0])[:n]
+        return [x[1] for x in min_gm_kernels]
+
+    def _get_distance_sum(self, weight, out_idx):
+        """
+        Calculate the total distance between a specified filter (by out_idex and in_idx) and
+        all other filters.
+        Optimized verision of following naive implementation:
+        def _get_distance_sum(self, weight, in_idx, out_idx):
+            w = weight.view(-1, weight.size(-2), weight.size(-1))
+            dist_sum = 0.
+            for k in w:
+                dist_sum += torch.dist(k, weight[in_idx, out_idx], p=2)
+            return dist_sum
+        Parameters
+        ----------
+        weight: Tensor
+            convolutional filter weight
+        out_idx: int
+            output channel index of specified filter, this method calculates the total distance
+            between this specified filter and all other filters.
+        Returns
+        -------
+        float32
+            The total distance
+        """
+        logger.debug('weight size: %s', weight.size())
+        assert len(weight.size()) in [3, 4], 'unsupported weight shape'
+
+        w = weight.view(weight.size(0), -1)
+        anchor_w = w[out_idx].unsqueeze(0).expand(w.size(0), w.size(1))
+        x = w - anchor_w
+        x = (x * x).sum(-1)
+        x = torch.sqrt(x)
+        return x.sum()
+
+    def update_epoch(self, epoch):
+        self.mask_calculated_ops = set()

From 98754c70eceba7dc0a73b07c6c96963c89b5f8f7 Mon Sep 17 00:00:00 2001
From: Lijiao <35484733+lvybriage@users.noreply.github.com>
Date: Mon, 30 Dec 2019 09:33:58 +0800
Subject: [PATCH 06/23] fix overview page table trialId style (#1875)

---
 src/webui/src/components/Modal/Compare.tsx         | 3 ++-
 src/webui/src/components/overview/SuccessTable.tsx | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/webui/src/components/Modal/Compare.tsx b/src/webui/src/components/Modal/Compare.tsx
index 2bc9a9c1e8..2221e5893e 100644
--- a/src/webui/src/components/Modal/Compare.tsx
+++ b/src/webui/src/components/Modal/Compare.tsx
@@ -91,7 +91,8 @@ class Compare extends React.Component<CompareProps, {}> {
             },
             yAxis: {
                 type: 'value',
-                name: 'Metric'
+                name: 'Metric',
+                scale: true
             },
             series: trialIntermediate
         };
diff --git a/src/webui/src/components/overview/SuccessTable.tsx b/src/webui/src/components/overview/SuccessTable.tsx
index 9019afec82..cb3ab1d33e 100644
--- a/src/webui/src/components/overview/SuccessTable.tsx
+++ b/src/webui/src/components/overview/SuccessTable.tsx
@@ -28,12 +28,11 @@ class SuccessTable extends React.Component<SuccessTableProps, {}> {
             {
                 title: 'Trial No.',
                 dataIndex: 'sequenceId',
-                width: 140,
                 className: 'tableHead'
             }, {
                 title: 'ID',
                 dataIndex: 'id',
-                width: 60,
+                width: 80,
                 className: 'tableHead leftTitle',
                 render: (text: string, record: TableRecord): React.ReactNode => {
                     return (

From 23c56b1f35a0488edbf7e2401424622163cb3f05 Mon Sep 17 00:00:00 2001
From: SparkSnail <shinyang@microsoft.com>
Date: Mon, 30 Dec 2019 13:41:11 +0800
Subject: [PATCH 07/23] quick fix nnictl view command (#1892)

---
 tools/nni_cmd/launcher.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py
index 3b52030f2f..54ce4aec77 100644
--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
@@ -517,7 +517,7 @@ def manage_stopped_experiment(args, mode):
     experiment_id = None
     #find the latest stopped experiment
     if not args.id:
-        print_error('Please set experiment id! \nYou could use \'nnictl {0} {id}\' to {0} a stopped experiment!\n' \
+        print_error('Please set experiment id! \nYou could use \'nnictl {0} id\' to {0} a stopped experiment!\n' \
         'You could use \'nnictl experiment list --all\' to show all experiments!'.format(mode))
         exit(1)
     else:

From 8d8c9de46b3118f0123db7b11a601190eada6086 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <v-yugzh@microsoft.com>
Date: Mon, 30 Dec 2019 14:13:57 +0800
Subject: [PATCH 08/23] [SPOS] Clarify checkpoint directory in docs (#1891)

---
 examples/nas/spos/README.md         | 6 ++++--
 examples/nas/spos/config_search.yml | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/nas/spos/README.md b/examples/nas/spos/README.md
index ed239f30a1..0bba5968f9 100644
--- a/examples/nas/spos/README.md
+++ b/examples/nas/spos/README.md
@@ -18,7 +18,7 @@ Only GPU version is provided here.
 Need to download the flops lookup table from [here](https://1drv.ms/u/s!Am_mmG2-KsrnajesvSdfsq_cN48?e=aHVppN).
 Put `op_flops_dict.pkl` and `checkpoint-150000.pth.tar` (if you don't want to retrain the supernet) under `data` directory.
 
-Prepare ImageNet in the standard format (follow the script [here](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4)). Link it to `data/imagenet` will be more convenient.
+Prepare ImageNet in the standard format (follow the script [here](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4)). Linking it to `data/imagenet` will be more convenient.
 
 After preparation, it's expected to have the following code structure:
 
@@ -48,7 +48,7 @@ spos
 python supernet.py
 ```
 
-Will export the checkpoint to checkpoints directory, for the next step.
+Will export the checkpoint to `checkpoints` directory, for the next step.
 
 NOTE: The data loading used in the official repo is [slightly different from usual](https://github.com/megvii-model/SinglePathOneShot/issues/5), as they use BGR tensor and keep the values between 0 and 255 intentionally to align with their own DL framework. The option `--spos-preprocessing` will simulate the behavior used originally and enable you to use the checkpoints pretrained.
 
@@ -64,6 +64,8 @@ nnictl ss_gen -t "python tester.py"
 
 This will generate a file called `nni_auto_gen_search_space.json`, which is a serialized representation of your search space.
 
+By default, it will use `checkpoint-150000.pth.tar` downloaded previously. In case you want to use the checkpoint trained by yourself from the last step, specify `--checkpoint` in the command in `config_search.yml`.
+
 Then search with evolution tuner.
 
 ```
diff --git a/examples/nas/spos/config_search.yml b/examples/nas/spos/config_search.yml
index fe27faefc8..80770568ac 100644
--- a/examples/nas/spos/config_search.yml
+++ b/examples/nas/spos/config_search.yml
@@ -11,6 +11,6 @@ tuner:
   classFileName: tuner.py
   className: EvolutionWithFlops
 trial:
-  command: python tester.py --imagenet-dir /path/to/your/imagenet --spos-prep
+  command: python tester.py --spos-prep
   codeDir: .
   gpuNum: 1

From 2906315c6384903922ca3cbff7de3aeca62c61a3 Mon Sep 17 00:00:00 2001
From: SparkSnail <shinyang@microsoft.com>
Date: Mon, 30 Dec 2019 15:23:34 +0800
Subject: [PATCH 09/23] quick fix http error (#1896)

---
 .../training_service/pai/paiK8S/paiK8STrainingService.ts        | 2 +-
 .../training_service/pai/paiYarn/paiYarnTrainingService.ts      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
index 2888a9f425..5c0360df6f 100644
--- a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
@@ -62,13 +62,13 @@ class PAIK8STrainingService extends PAITrainingService {
             case TrialConfigMetadataKey.PAI_CLUSTER_CONFIG:
                 this.paiJobRestServer = new PAIJobRestServer(component.get(PAIK8STrainingService));
                 this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value);
+                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 if(this.paiClusterConfig.passWord) {
                     // Get PAI authentication token
                     await this.updatePaiToken();
                 } else if(this.paiClusterConfig.token) {
                     this.paiToken = this.paiClusterConfig.token;
                 }
-                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 break;
 
             case TrialConfigMetadataKey.TRIAL_CONFIG:
diff --git a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
index 6b6f905b72..b10a7172ad 100644
--- a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
@@ -99,6 +99,7 @@ class PAIYarnTrainingService extends PAITrainingService {
                     path: '/webhdfs/api/v1',
                     host: this.paiClusterConfig.host
                 });
+                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 if(this.paiClusterConfig.passWord) {
                     // Get PAI authentication token
                     await this.updatePaiToken();
@@ -107,7 +108,6 @@ class PAIYarnTrainingService extends PAITrainingService {
                 } else {
                     throw new Error('pai cluster config format error, please set password or token!');
                 }
-                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 break;
 
             case TrialConfigMetadataKey.TRIAL_CONFIG:

From faca02c3e1490690a52b5181d30aab69af56a370 Mon Sep 17 00:00:00 2001
From: QuanluZhang <Quanlu.Zhang@microsoft.com>
Date: Mon, 30 Dec 2019 16:11:55 +0800
Subject: [PATCH 10/23] update doc for pruning algorithms (#1897)

---
 .../Compressor/ActivationRankFilterPruner.md  | 58 ------------
 docs/en_US/Compressor/Overview.md             |  8 +-
 docs/en_US/Compressor/Pruner.md               | 91 ++++++++++++-------
 ...tRankFilterPruner.md => l1filterpruner.md} | 48 +---------
 4 files changed, 64 insertions(+), 141 deletions(-)
 delete mode 100644 docs/en_US/Compressor/ActivationRankFilterPruner.md
 rename docs/en_US/Compressor/{WeightRankFilterPruner.md => l1filterpruner.md} (51%)

diff --git a/docs/en_US/Compressor/ActivationRankFilterPruner.md b/docs/en_US/Compressor/ActivationRankFilterPruner.md
deleted file mode 100644
index 7c836cb140..0000000000
--- a/docs/en_US/Compressor/ActivationRankFilterPruner.md
+++ /dev/null
@@ -1,58 +0,0 @@
-ActivationRankFilterPruner on NNI Compressor
-===
-
-## 1. Introduction
-
-ActivationRankFilterPruner is a series of pruners which prune filters according to some importance criterion calculated from the filters' output activations.
-
-|             Pruner             |       Importance criterion        |                       Reference paper                        |
-| :----------------------------: | :-------------------------------: | :----------------------------------------------------------: |
-| ActivationAPoZRankFilterPruner | APoZ(average percentage of zeros) | [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250) |
-| ActivationMeanRankFilterPruner | mean value of output activations  | [Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440) |
-
-## 2. Pruners
-
-### ActivationAPoZRankFilterPruner
-
-Hengyuan Hu, Rui Peng, Yu-Wing Tai and Chi-Keung Tang,
-
-"[Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250)", ICLR 2016.
-
-ActivationAPoZRankFilterPruner prunes the filters with the smallest APoZ(average percentage of zeros) of output activations.
-
-The APoZ is defined as:
-
-![](../../img/apoz.png)
-
-### ActivationMeanRankFilterPruner
-
-Pavlo Molchanov, Stephen Tyree, Tero Karras, Timo Aila and Jan Kautz,
-
-"[Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440)", ICLR 2017.
-
-ActivationMeanRankFilterPruner prunes the filters with the smallest mean value of output activations
-
-## 3. Usage
-
-PyTorch code
-
-```python
-from nni.compression.torch import ActivationAPoZRankFilterPruner
-config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'], 'op_names': ['conv1', 'conv2'] }]
-pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1)
-pruner.compress()
-```
-
-#### User configuration for ActivationAPoZRankFilterPruner
-
-- **sparsity:** This is to specify the sparsity operations to be compressed to
-- **op_types:** Only Conv2d is supported in ActivationAPoZRankFilterPruner
-
-## 4. Experiment
-
-TODO. 
-
-
-
-
-
diff --git a/docs/en_US/Compressor/Overview.md b/docs/en_US/Compressor/Overview.md
index b8e2903afb..0cde1cdf75 100644
--- a/docs/en_US/Compressor/Overview.md
+++ b/docs/en_US/Compressor/Overview.md
@@ -21,11 +21,11 @@ Pruning algorithms compress the original network by removing redundant weights o
 | [AGP Pruner](./Pruner.md#agp-pruner) | Automated gradual pruning (To prune, or not to prune: exploring the efficacy of pruning for model compression) [Reference Paper](https://arxiv.org/abs/1710.01878)|
 | [Lottery Ticket Pruner](./Pruner.md#agp-pruner) | The pruning process used by "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks". It prunes a model iteratively. [Reference Paper](https://arxiv.org/abs/1803.03635)|
 | [FPGM Pruner](./Pruner.md#fpgm-pruner) | Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration [Reference Paper](https://arxiv.org/pdf/1811.00250.pdf)|
-| [L1Filter Pruner](./Pruner.md#l1filter-pruner) | Pruning filters with the smallest L1 norm of weights in convolution layers(PRUNING FILTERS FOR EFFICIENT CONVNETS)[Reference Paper](https://arxiv.org/abs/1608.08710) |
+| [L1Filter Pruner](./Pruner.md#l1filter-pruner) | Pruning filters with the smallest L1 norm of weights in convolution layers (Pruning Filters for Efficient Convnets) [Reference Paper](https://arxiv.org/abs/1608.08710) |
 | [L2Filter Pruner](./Pruner.md#l2filter-pruner) | Pruning filters with the smallest L2 norm of weights in convolution layers |
-| [ActivationAPoZRankFilterPruner](./Pruner.md#ActivationAPoZRankFilterPruner) | Pruning filters prunes the filters with the smallest APoZ(average percentage of zeros) of output activations(Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures)[Reference Paper](https://arxiv.org/abs/1607.03250) |
-| [ActivationMeanRankFilterPruner](./Pruner.md#ActivationMeanRankFilterPruner) | Pruning filters prunes the filters with the smallest mean value of output activations(Pruning Convolutional Neural Networks for Resource Efficient Inference)[Reference Paper](https://arxiv.org/abs/1611.06440) |
-| [Slim Pruner](./Pruner.md#slim-pruner) | Pruning channels in convolution layers by pruning scaling factors in BN layers(Learning Efficient Convolutional Networks through Network Slimming)[Reference Paper](https://arxiv.org/abs/1708.06519) |
+| [ActivationAPoZRankFilterPruner](./Pruner.md#ActivationAPoZRankFilterPruner) | Pruning filters based on the metric APoZ (average percentage of zeros) which measures the percentage of zeros in activations of (convolutional) layers. [Reference Paper](https://arxiv.org/abs/1607.03250) |
+| [ActivationMeanRankFilterPruner](./Pruner.md#ActivationMeanRankFilterPruner) | Pruning filters based on the metric that calculates the smallest mean value of output activations |
+| [Slim Pruner](./Pruner.md#slim-pruner) | Pruning channels in convolution layers by pruning scaling factors in BN layers(Learning Efficient Convolutional Networks through Network Slimming) [Reference Paper](https://arxiv.org/abs/1708.06519) |
 
 
 **Quantization**
diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md
index a96414edae..4b5fdf16af 100644
--- a/docs/en_US/Compressor/Pruner.md
+++ b/docs/en_US/Compressor/Pruner.md
@@ -1,6 +1,19 @@
 Pruner on NNI Compressor
 ===
 
+Index of supported pruning algorithms
+* [Level Pruner](#level-pruner)
+* [AGP Pruner](#agp-pruner)
+* [Lottery Ticket Hypothesis](#lottery-ticket-hypothesis)
+* [Slim Pruner](#slim-pruner)
+* [Filter Pruners with Weight Rank](#weightrankfilterpruner)
+    * [FPGM Pruner](#fpgm-pruner)
+    * [L1Filter Pruner](#l1filter-pruner)
+    * [L2Filter Pruner](#l2filter-pruner)
+* [Filter Pruners with Activation Rank](#activationrankfilterpruner)
+    * [APoZ Rank Pruner](#activationapozrankfilterpruner)
+    * [Activation Mean Rank Pruner](#activationmeanrankfilterpruner)
+
 ## Level Pruner
 
 This is one basic one-shot pruner: you can set a target sparsity level (expressed as a fraction, 0.6 means we will prune 60%). 
@@ -131,13 +144,43 @@ The above configuration means that there are 5 times of iterative pruning. As th
 * **sparsity:** The final sparsity when the compression is done.
 
 ***
+
+## Slim Pruner
+
+This is an one-shot pruner, In ['Learning Efficient Convolutional Networks through Network Slimming'](https://arxiv.org/pdf/1708.06519.pdf), authors Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang.
+
+![](../../img/slim_pruner.png)
+
+> Slim Pruner **prunes channels in the convolution layers by masking corresponding scaling factors in the later BN layers**, L1 regularization on the scaling factors should be applied in batch normalization (BN) layers while training, scaling factors of BN layers are **globally ranked** while pruning, so the sparse model can be automatically found given sparsity.
+
+### Usage
+
+PyTorch code
+
+```python
+from nni.compression.torch import SlimPruner
+config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
+pruner = SlimPruner(model, config_list)
+pruner.compress()
+```
+
+#### User configuration for Slim Pruner
+
+- **sparsity:** This is to specify the sparsity operations to be compressed to
+- **op_types:** Only BatchNorm2d is supported in Slim Pruner
+
+
 ## WeightRankFilterPruner
 WeightRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the weights in convolution layers to achieve a preset level of network sparsity
 
-### 1, FPGM Pruner
+### FPGM Pruner
 
 This is an one-shot pruner, FPGM Pruner is an implementation of paper [Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https://arxiv.org/pdf/1811.00250.pdf)
 
+FPGMPruner prune filters with the smallest geometric median
+
+ ![](../../img/fpgm_fig1.png)
+
 >Previous works utilized “smaller-norm-less-important” criterion to prune filters with smaller norm values in a convolutional neural network. In this paper, we analyze this norm-based criterion and point out that its effectiveness depends on two requirements that are not always met: (1) the norm deviation of the filters should be large; (2) the minimum norm of the filters should be small. To solve this problem, we propose a novel filter pruning method, namely Filter Pruning via Geometric Median (FPGM), to compress the model regardless of those two requirements. Unlike previous methods, FPGM compresses CNN models by pruning filters with redundancy, rather than those with “relatively less” importance.
 
 #### Usage
@@ -181,9 +224,9 @@ You can view example for more information
 
 ***
 
-### 2, L1Filter Pruner
+### L1Filter Pruner
 
-This is an one-shot pruner, In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710), authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf.
+This is an one-shot pruner, In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710), authors Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet and Hans Peter Graf. The reproduced experiment results can be found [here](l1filterpruner.md)
 
 ![](../../img/l1filter_pruner.png)
 
@@ -217,9 +260,9 @@ pruner.compress()
 
 ***
 
-### 3, L2Filter Pruner
+### L2Filter Pruner
 
-This is a structured pruning algorithm that prunes the filters with the smallest L2 norm of the weights.
+This is a structured pruning algorithm that prunes the filters with the smallest L2 norm of the weights. It is implemented as a one-shot pruner.
 
 #### Usage
 
@@ -240,9 +283,13 @@ pruner.compress()
 ## ActivationRankFilterPruner
 ActivationRankFilterPruner is a series of pruners which prune the filters with the smallest importance criterion calculated from the output activations of convolution layers to achieve a preset level of network sparsity
 
-### 1, ActivationAPoZRankFilterPruner
+### ActivationAPoZRankFilterPruner
+
+We implemented it as a one-shot pruner, it prunes convolutional layers based on the criterion `APoZ` which is explained in the paper [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250). Iterative pruning based on `APoZ` will be supported in future release.
 
-This is an one-shot pruner, ActivationAPoZRankFilterPruner is an implementation of paper [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250)
+The APoZ is defined as:
+
+![](../../img/apoz.png)
 
 #### Usage
 
@@ -269,9 +316,9 @@ You can view example for more information
 
 ***
 
-### 2, ActivationMeanRankFilterPruner
+### ActivationMeanRankFilterPruner
 
-This is an one-shot pruner, ActivationMeanRankFilterPruner is an implementation of paper [Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440)
+We implemented it as a one-shot pruner, it prunes convolutional layers based on the criterion `mean activation` which is explained in section 2.2 of the paper[Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440). Other pruning criteria mentioned in this paper will be supported in future release.
 
 #### Usage
 
@@ -296,28 +343,4 @@ You can view example for more information
 - **sparsity:** How much percentage of convolutional filters are to be pruned.
 - **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner
 
-***
-
-## Slim Pruner
-
-This is an one-shot pruner, In ['Learning Efficient Convolutional Networks through Network Slimming'](https://arxiv.org/pdf/1708.06519.pdf), authors Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan and Changshui Zhang.
-
-![](../../img/slim_pruner.png)
-
-> Slim Pruner **prunes channels in the convolution layers by masking corresponding scaling factors in the later BN layers**, L1 regularization on the scaling factors should be applied in batch normalization (BN) layers while training, scaling factors of BN layers are **globally ranked** while pruning, so the sparse model can be automatically found given sparsity.
-
-### Usage
-
-PyTorch code
-
-```python
-from nni.compression.torch import SlimPruner
-config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
-pruner = SlimPruner(model, config_list)
-pruner.compress()
-```
-
-#### User configuration for Slim Pruner
-
-- **sparsity:** This is to specify the sparsity operations to be compressed to
-- **op_types:** Only BatchNorm2d is supported in Slim Pruner
+***
\ No newline at end of file
diff --git a/docs/en_US/Compressor/WeightRankFilterPruner.md b/docs/en_US/Compressor/l1filterpruner.md
similarity index 51%
rename from docs/en_US/Compressor/WeightRankFilterPruner.md
rename to docs/en_US/Compressor/l1filterpruner.md
index ef99dcff03..dc42d6478d 100644
--- a/docs/en_US/Compressor/WeightRankFilterPruner.md
+++ b/docs/en_US/Compressor/l1filterpruner.md
@@ -1,19 +1,7 @@
-WeightRankFilterPruner on NNI Compressor
+L1FilterPruner on NNI
 ===
 
-## 1. Introduction
-
-WeightRankFilterPruner is a series of pruners which prune filters according to some importance criterion calculated from the filters' weight.
-
-|     Pruner     |    Importance criterion     |                       Reference paper                        |
-| :------------: | :-------------------------: | :----------------------------------------------------------: |
-| L1FilterPruner |     L1 norm of weights      | [PRUNING FILTERS FOR EFFICIENT CONVNETS](https://arxiv.org/abs/1608.08710) |
-| L2FilterPruner |     L2 norm of weights      |                                                              |
-|   FPGMPruner   | Geometric Median of weights | [Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https://arxiv.org/pdf/1811.00250.pdf) |
-
-## 2. Pruners
-
-### L1FilterPruner
+## Introduction
 
 L1FilterPruner is a general structured pruning algorithm for pruning filters in the convolutional layers.
 
@@ -33,37 +21,7 @@ In ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710),
 > 4. A new kernel matrix is created for both the ![](http://latex.codecogs.com/gif.latex?i)th and ![](http://latex.codecogs.com/gif.latex?i+1)th layers, and the remaining kernel
 >      weights are copied to the new model.
 
-### L2FilterPruner
-
-L2FilterPruner is similar to L1FilterPruner, but only replace the importance criterion from L1 norm to L2 norm
-
-### FPGMPruner
-
-Yang He, Ping Liu, Ziwei Wang, Zhilan Hu, Yi Yang
-
-"[Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https://arxiv.org/abs/1811.00250)", CVPR 2019.
-
-FPGMPruner prune filters with the smallest geometric median
-
- ![](../../img/fpgm_fig1.png)
-
-## 3. Usage
-
-PyTorch code
-
-```
-from nni.compression.torch import L1FilterPruner
-config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'], 'op_names': ['conv1', 'conv2'] }]
-pruner = L1FilterPruner(model, config_list)
-pruner.compress()
-```
-
-#### User configuration for L1Filter Pruner
-
-- **sparsity:** This is to specify the sparsity operations to be compressed to
-- **op_types:** Only Conv2d is supported in L1Filter Pruner
-
-## 4. Experiment
+## Experiment
 
 We implemented one of the experiments in ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710) with **L1FilterPruner**, we pruned **VGG-16** for CIFAR-10 to **VGG-16-pruned-A** in the paper, in which $64\%$ parameters are pruned. Our experiments results are as follows:
 

From 01385bb076efaa3ef7253c12fb01e938d5893d41 Mon Sep 17 00:00:00 2001
From: SparkSnail <shinyang@microsoft.com>
Date: Tue, 31 Dec 2019 11:11:15 +0800
Subject: [PATCH 11/23] Fix pai http format error, add protocol (#1898)

---
 .../training_service/pai/paiJobInfoCollector.ts | 16 ++++++++++------
 .../pai/paiK8S/paiK8STrainingService.ts         |  2 +-
 .../training_service/pai/paiTrainingService.ts  | 17 +++++++++++------
 .../pai/paiYarn/paiYarnTrainingService.ts       |  4 +++-
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
index 07ec1cda7f..e88a4d6f41 100644
--- a/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
+++ b/src/nni_manager/training_service/pai/paiJobInfoCollector.ts
@@ -25,7 +25,7 @@ export class PAIJobInfoCollector {
         this.finalStatuses = ['SUCCEEDED', 'FAILED', 'USER_CANCELED', 'SYS_CANCELED', 'EARLY_STOPPED'];
     }
 
-    public async retrieveTrialStatus(token? : string, paiBaseClusterConfig?: PAIClusterConfig): Promise<void> {
+    public async retrieveTrialStatus(protocol: string, token? : string, paiBaseClusterConfig?: PAIClusterConfig): Promise<void> {
         if (paiBaseClusterConfig === undefined || token === undefined) {
             return Promise.resolve();
         }
@@ -35,13 +35,13 @@ export class PAIJobInfoCollector {
             if (paiTrialJob === undefined) {
                 throw new NNIError(NNIErrorNames.NOT_FOUND, `trial job id ${trialJobId} not found`);
             }
-            updatePaiTrialJobs.push(this.getSinglePAITrialJobInfo(paiTrialJob, token, paiBaseClusterConfig));
+            updatePaiTrialJobs.push(this.getSinglePAITrialJobInfo(protocol, paiTrialJob, token, paiBaseClusterConfig));
         }
 
         await Promise.all(updatePaiTrialJobs);
     }
 
-    private getSinglePAITrialJobInfo(paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> {
+    private getSinglePAITrialJobInfo(protocol: string, paiTrialJob: PAITrialJobDetail, paiToken: string, paiClusterConfig: PAIClusterConfig): Promise<void> {
         const deferred: Deferred<void> = new Deferred<void>();
         if (!this.statusesNeedToCheck.includes(paiTrialJob.status)) {
             deferred.resolve();
@@ -52,7 +52,7 @@ export class PAIJobInfoCollector {
         // Rest call to get PAI job info and update status
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const getJobInfoRequest: request.Options = {
-            uri: `${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
+            uri: `${protocol}://${paiClusterConfig.host}/rest-server/api/v1/user/${paiClusterConfig.userName}/jobs/${paiTrialJob.paiJobName}`,
             method: 'GET',
             json: true,
                headers: {
@@ -81,7 +81,11 @@ export class PAIJobInfoCollector {
                                 paiTrialJob.startTime = response.body.jobStatus.appLaunchedTime;
                             }
                             if (paiTrialJob.url === undefined) {
-                                paiTrialJob.url = response.body.jobStatus.appTrackingUrl;
+                                if (response.body.jobStatus.appTrackingUrl) {
+                                    paiTrialJob.url = response.body.jobStatus.appTrackingUrl;
+                                } else {
+                                    paiTrialJob.url = paiTrialJob.logPath;
+                                }
                             }
                             break;
                         case 'SUCCEEDED':
@@ -114,7 +118,7 @@ export class PAIJobInfoCollector {
                         }
                         // Set pai trial job's url to WebHDFS output path
                         if (paiTrialJob.logPath !== undefined) {
-                            if (paiTrialJob.url) {
+                            if (paiTrialJob.url && paiTrialJob.url !== paiTrialJob.logPath) {
                                 paiTrialJob.url += `,${paiTrialJob.logPath}`;
                             } else {
                                 paiTrialJob.url = `${paiTrialJob.logPath}`;
diff --git a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
index 5c0360df6f..fc64d4dbdc 100644
--- a/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiK8S/paiK8STrainingService.ts
@@ -258,7 +258,7 @@ class PAIK8STrainingService extends PAITrainingService {
         // Step 3. Submit PAI job via Rest call
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const submitJobRequest: request.Options = {
-            uri: `${this.paiClusterConfig.host}/rest-server/api/v2/jobs`,
+            uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v2/jobs`,
             method: 'POST',
             body: paiJobConfig,
             headers: {
diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts
index 5709d4678d..01cd4ed9dc 100644
--- a/src/nni_manager/training_service/pai/paiTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiTrainingService.ts
@@ -52,6 +52,7 @@ abstract class PAITrainingService implements TrainingService {
     protected authFileHdfsPath: string | undefined = undefined;
     protected portList?: string | undefined;
     protected paiJobRestServer?: PAIJobRestServer;
+    protected protocol: string = 'http';
 
     constructor() {
         this.log = getLogger();
@@ -165,7 +166,7 @@ abstract class PAITrainingService implements TrainingService {
         }
 
         const stopJobRequest: request.Options = {
-            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
+            uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}\
 /jobs/${trialJobDetail.paiJobName}/executionType`, 
             method: 'PUT',
             json: true,
@@ -219,10 +220,14 @@ abstract class PAITrainingService implements TrainingService {
     protected formatPAIHost(host: string): string {
         // If users' host start with 'http://' or 'https://', use the original host,
         // or format to 'http//${host}'
-        if (host.startsWith('http://') || host.startsWith('https://')) {
-            return host;
+        if (host.startsWith('http://')) {
+            this.protocol = 'http';
+            return host.replace('http://', '');
+        } else if (host.startsWith('https://')) {
+            this.protocol = 'https';
+            return host.replace('https://', '');
         } else {
-            return `http://${host}`;
+            return host;
         }
     }
 
@@ -239,7 +244,7 @@ abstract class PAITrainingService implements TrainingService {
                     }
                 }
             }
-            await this.paiJobCollector.retrieveTrialStatus(this.paiToken, this.paiClusterConfig);
+            await this.paiJobCollector.retrieveTrialStatus(this.protocol, this.paiToken, this.paiClusterConfig);
             if (this.paiJobRestServer === undefined) {
                 throw new Error('paiBaseJobRestServer not implemented!');
             }
@@ -269,7 +274,7 @@ abstract class PAITrainingService implements TrainingService {
         }
 
         const authenticationReq: request.Options = {
-            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/token`,
+            uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/token`,
             method: 'POST',
             json: true,
             body: {
diff --git a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
index b10a7172ad..08038e5b59 100644
--- a/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
+++ b/src/nni_manager/training_service/pai/paiYarn/paiYarnTrainingService.ts
@@ -91,6 +91,7 @@ class PAIYarnTrainingService extends PAITrainingService {
             case TrialConfigMetadataKey.PAI_YARN_CLUSTER_CONFIG:
                 this.paiJobRestServer = new PAIJobRestServer(component.get(PAIYarnTrainingService));
                 this.paiClusterConfig = <PAIClusterConfig>JSON.parse(value);
+                this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
 
                 this.hdfsClient = WebHDFS.createClient({
                     user: this.paiClusterConfig.userName,
@@ -98,6 +99,7 @@ class PAIYarnTrainingService extends PAITrainingService {
                     port: 80,
                     path: '/webhdfs/api/v1',
                     host: this.paiClusterConfig.host
+                    
                 });
                 this.paiClusterConfig.host = this.formatPAIHost(this.paiClusterConfig.host);
                 if(this.paiClusterConfig.passWord) {
@@ -272,7 +274,7 @@ class PAIYarnTrainingService extends PAITrainingService {
         // Step 3. Submit PAI job via Rest call
         // Refer https://github.com/Microsoft/pai/blob/master/docs/rest-server/API.md for more detail about PAI Rest API
         const submitJobRequest: request.Options = {
-            uri: `${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`,
+            uri: `${this.protocol}://${this.paiClusterConfig.host}/rest-server/api/v1/user/${this.paiClusterConfig.userName}/jobs`,
             method: 'POST',
             json: true,
             body: paiJobConfig,

From 31f545ee043becc272ea158b1d969d9539529662 Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Tue, 31 Dec 2019 12:13:03 +0800
Subject: [PATCH 12/23] Update KDExample.md: fix missing links in doc (#1894)

---
 docs/en_US/TrialExample/KDExample.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en_US/TrialExample/KDExample.md b/docs/en_US/TrialExample/KDExample.md
index b0ead466d2..51a91f0ea1 100644
--- a/docs/en_US/TrialExample/KDExample.md
+++ b/docs/en_US/TrialExample/KDExample.md
@@ -30,4 +30,4 @@ for batch_idx, (data, target) in enumerate(train_loader):
 * **kd_teacher_model:** The pre-trained teacher model 
 * **kd_T:** Temperature for smoothing teacher model's output
 
-The complete code can be found here
\ No newline at end of file
+The complete code can be found [here](https://github.com/microsoft/nni/tree/v1.3/examples/model_compress/knowledge_distill/)

From c993f767a8a5ce041921c1825ac407d9be847bb9 Mon Sep 17 00:00:00 2001
From: Yuge Zhang <v-yugzh@microsoft.com>
Date: Tue, 31 Dec 2019 13:00:24 +0800
Subject: [PATCH 13/23] Add SPOS docs and improve NAS doc structure (#1907)

* darts mutator docs

* fix docs

* update

* add docs for SPOS

* index SPOS

* restore workers
---
 docs/en_US/NAS/DARTS.md                       |  44 ++++++-
 docs/en_US/NAS/ENAS.md                        |  43 ++++++-
 docs/en_US/NAS/Overview.md                    |  79 ++----------
 docs/en_US/NAS/PDARTS.md                      |  18 +++
 docs/en_US/NAS/SPOS.md                        | 119 ++++++++++++++++++
 docs/en_US/nas.rst                            |   3 +-
 examples/nas/darts/README.md                  |   1 +
 examples/nas/enas/README.md                   |   1 +
 examples/nas/naive/README.md                  |   1 +
 examples/nas/pdarts/README.md                 |   1 +
 examples/nas/spos/README.md                   |  91 +-------------
 .../pynni/nni/nas/pytorch/darts/mutator.py    |  20 +++
 .../pynni/nni/nas/pytorch/darts/trainer.py    |  36 ++++++
 src/sdk/pynni/nni/nas/pytorch/enas/mutator.py |  25 ++++
 src/sdk/pynni/nni/nas/pytorch/enas/trainer.py |  48 +++++++
 .../pynni/nni/nas/pytorch/spos/evolution.py   |   1 +
 src/sdk/pynni/nni/nas/pytorch/spos/mutator.py |   1 +
 src/sdk/pynni/nni/nas/pytorch/spos/trainer.py |  31 +++++
 src/sdk/pynni/nni/nas/pytorch/trainer.py      |   2 +-
 19 files changed, 395 insertions(+), 170 deletions(-)
 create mode 100644 docs/en_US/NAS/PDARTS.md
 create mode 100644 docs/en_US/NAS/SPOS.md
 create mode 100644 examples/nas/darts/README.md
 create mode 100644 examples/nas/enas/README.md
 create mode 100644 examples/nas/naive/README.md
 create mode 100644 examples/nas/pdarts/README.md

diff --git a/docs/en_US/NAS/DARTS.md b/docs/en_US/NAS/DARTS.md
index 2621fc45b7..d742a8ef6f 100644
--- a/docs/en_US/NAS/DARTS.md
+++ b/docs/en_US/NAS/DARTS.md
@@ -1,18 +1,50 @@
-# DARTS on NNI
+# DARTS
 
 ## Introduction
 
-The paper [DARTS: Differentiable Architecture Search](https://arxiv.org/abs/1806.09055) addresses the scalability challenge of architecture search by formulating the task in a differentiable manner. Their method is based on the continuous relaxation of the architecture representation, allowing efficient search of the architecture using gradient descent
+The paper [DARTS: Differentiable Architecture Search](https://arxiv.org/abs/1806.09055) addresses the scalability challenge of architecture search by formulating the task in a differentiable manner. Their method is based on the continuous relaxation of the architecture representation, allowing efficient search of the architecture using gradient descent.
 
-To implement, authors optimize the network weights and architecture weights alternatively in mini-batches. They further explore the possibility that uses second order optimization (unroll) instead of first order, to improve the performance.
+Authors' code optimizes the network weights and architecture weights alternatively in mini-batches. They further explore the possibility that uses second order optimization (unroll) instead of first order, to improve the performance.
 
-Implementation on NNI is based on the [official implementation](https://github.com/quark0/darts) and a [popular 3rd-party repo](https://github.com/khanrc/pt.darts). So far, first and second order optimization and training from scratch on CIFAR10 have been implemented.
+Implementation on NNI is based on the [official implementation](https://github.com/quark0/darts) and a [popular 3rd-party repo](https://github.com/khanrc/pt.darts). DARTS on NNI is designed to be general for arbitrary search space. A CNN search space tailored for CIFAR10, same as the original paper, is implemented as a use case of DARTS.
 
-## Reproduce Results
+## Reproduction Results
 
-To reproduce the results in the paper, we do experiments with first and second order optimization. Due to the time limit, we retrain *only the best architecture* derived from the search phase and we repeat the experiment *only once*. Our results is currently on par with the results reported in paper. We will add more results later when ready.
+The above-mentioned example is meant to reproduce the results in the paper, we do experiments with first and second order optimization. Due to the time limit, we retrain *only the best architecture* derived from the search phase and we repeat the experiment *only once*. Our results is currently on par with the results reported in paper. We will add more results later when ready.
 
 |                        | In paper      | Reproduction |
 | ---------------------- | ------------- | ------------ |
 | First order (CIFAR10)  | 3.00 +/- 0.14 | 2.78         |
 | Second order (CIFAR10) | 2.76 +/- 0.09 | 2.89         |
+
+## Examples
+
+### CNN Search Space
+
+[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/darts)
+
+```bash
+# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+git clone https://github.com/Microsoft/nni.git
+
+# search the best architecture
+cd examples/nas/darts
+python3 search.py
+
+# train the best architecture
+python3 retrain.py --arc-checkpoint ./checkpoints/epoch_49.json
+```
+
+## Reference
+
+### PyTorch
+
+```eval_rst
+..  autoclass:: nni.nas.pytorch.darts.DartsTrainer
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.darts.DartsMutator
+    :members:
+```
diff --git a/docs/en_US/NAS/ENAS.md b/docs/en_US/NAS/ENAS.md
index 164bc4397d..ad389f28b9 100644
--- a/docs/en_US/NAS/ENAS.md
+++ b/docs/en_US/NAS/ENAS.md
@@ -1,7 +1,46 @@
-# ENAS on NNI
+# ENAS
 
 ## Introduction
 
 The paper [Efficient Neural Architecture Search via Parameter Sharing](https://arxiv.org/abs/1802.03268) uses parameter sharing between child models to accelerate the NAS process. In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. The controller is trained with policy gradient to select a subgraph that maximizes the expected reward on the validation set. Meanwhile the model corresponding to the selected subgraph is trained to minimize a canonical cross entropy loss.
 
-Implementation on NNI is based on the [official implementation in Tensorflow](https://github.com/melodyguan/enas), macro and micro search space on CIFAR10 included. Since code to train from scratch on NNI is not ready yet, reproduction results are currently unavailable.
+Implementation on NNI is based on the [official implementation in Tensorflow](https://github.com/melodyguan/enas), including a general-purpose Reinforcement-learning controller and a trainer that trains target network and this controller alternatively. Following paper, we have also implemented macro and micro search space on CIFAR10 to demonstrate how to use these trainers. Since code to train from scratch on NNI is not ready yet, reproduction results are currently unavailable.
+
+## Examples
+
+### CIFAR10 Macro/Micro Search Space
+
+[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/enas)
+
+```bash
+# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+git clone https://github.com/Microsoft/nni.git
+
+# search the best architecture
+cd examples/nas/enas
+
+# search in macro search space
+python3 search.py --search-for macro
+
+# search in micro search space
+python3 search.py --search-for micro
+
+# view more options for search
+python3 search.py -h
+```
+
+## Reference
+
+### PyTorch
+
+```eval_rst
+..  autoclass:: nni.nas.pytorch.enas.EnasTrainer
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.enas.EnasMutator
+    :members:
+
+    .. automethod:: __init__
+```
diff --git a/docs/en_US/NAS/Overview.md b/docs/en_US/NAS/Overview.md
index 3426673669..aaf6e599c3 100644
--- a/docs/en_US/NAS/Overview.md
+++ b/docs/en_US/NAS/Overview.md
@@ -6,7 +6,7 @@ However, it takes great efforts to implement NAS algorithms, and it is hard to r
 
 With this motivation, our ambition is to provide a unified architecture in NNI, to accelerate innovations on NAS, and apply state-of-art algorithms on real world problems faster.
 
-With [the unified interface](./NasInterface.md), there are two different modes for the architecture search. [The one](#supported-one-shot-nas-algorithms) is the so-called one-shot NAS, where a super-net is built based on search space, and using one shot training to generate good-performing child model. [The other](./NasInterface.md#classic-distributed-search) is the traditional searching approach, where each child model in search space runs as an independent trial, the performance result is sent to tuner and the tuner generates new child model.
+With [the unified interface](./NasInterface.md), there are two different modes for the architecture search. [One](#supported-one-shot-nas-algorithms) is the so-called one-shot NAS, where a super-net is built based on search space, and using one shot training to generate good-performing child model. [The other](./NasInterface.md#classic-distributed-search) is the traditional searching approach, where each child model in search space runs as an independent trial, the performance result is sent to tuner and the tuner generates new child model.
 
 * [Supported One-shot NAS Algorithms](#supported-one-shot-nas-algorithms)
 * [Classic Distributed NAS with NNI experiment](./NasInterface.md#classic-distributed-search)
@@ -14,85 +14,24 @@ With [the unified interface](./NasInterface.md), there are two different modes f
 
 ## Supported One-shot NAS Algorithms
 
-NNI supports below NAS algorithms now and being adding more. User can reproduce an algorithm or use it on owned dataset. we also encourage user to implement other algorithms with [NNI API](#use-nni-api), to benefit more people.
+NNI supports below NAS algorithms now and is adding more. User can reproduce an algorithm or use it on their own dataset. We also encourage users to implement other algorithms with [NNI API](#use-nni-api), to benefit more people.
 
 |Name|Brief Introduction of Algorithm|
 |---|---|
-| [ENAS](#enas) | Efficient Neural Architecture Search via Parameter Sharing [Reference Paper][1] |
-| [DARTS](#darts) | DARTS: Differentiable Architecture Search [Reference Paper][3] |
-| [P-DARTS](#p-darts) | Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation [Reference Paper](https://arxiv.org/abs/1904.12760)|
+| [ENAS](ENAS.md) | [Efficient Neural Architecture Search via Parameter Sharing](https://arxiv.org/abs/1802.03268). In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. It uses parameter sharing between child models to achieve fast speed and excellent performance. |
+| [DARTS](DARTS.md) | [DARTS: Differentiable Architecture Search](https://arxiv.org/abs/1806.09055) introduces a novel algorithm for differentiable network architecture search on bilevel optimization. |
+| [P-DARTS](PDARTS.md) | [Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation](https://arxiv.org/abs/1904.12760) is based on DARTS. It introduces an efficient algorithm which allows the depth of searched architectures to grow gradually during the training procedure. |
+| [SPOS](SPOS.md) | [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) constructs a simplified supernet trained with an uniform path sampling method, and applies an evolutionary algorithm to efficiently search for the best-performing architectures. |
 
-Note, these algorithms run **standalone without nnictl**, and supports PyTorch only. Tensorflow 2.0 will be supported in future release.
+One-shot algorithms run **standalone without nnictl**. Only PyTorch version has been implemented. Tensorflow 2.x will be supported in future release.
 
-### Dependencies
+Here are some common dependencies to run the examples. PyTorch needs to be above 1.2 to use ``BoolTensor``.
 
 * NNI 1.2+
 * tensorboard
 * PyTorch 1.2+
 * git
 
-### ENAS
-
-[Efficient Neural Architecture Search via Parameter Sharing][1]. In ENAS, a controller learns to discover neural network architectures by searching for an optimal subgraph within a large computational graph. It uses parameter sharing between child models to achieve fast speed and excellent performance.
-
-#### Usage
-
-ENAS in NNI is still under development and we only support search phase for macro/micro search space on CIFAR10. Training from scratch and search space on PTB has not been finished yet. [Detailed Description](ENAS.md)
-
-```bash
-# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
-git clone https://github.com/Microsoft/nni.git
-
-# search the best architecture
-cd examples/nas/enas
-
-# search in macro search space
-python3 search.py --search-for macro
-
-# search in micro search space
-python3 search.py --search-for micro
-
-# view more options for search
-python3 search.py -h
-```
-
-### DARTS
-
-The main contribution of [DARTS: Differentiable Architecture Search][3] on algorithm is to introduce a novel algorithm for differentiable network architecture search on bilevel optimization. [Detailed Description](DARTS.md)
-
-#### Usage
-
-```bash
-# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
-git clone https://github.com/Microsoft/nni.git
-
-# search the best architecture
-cd examples/nas/darts
-python3 search.py
-
-# train the best architecture
-python3 retrain.py --arc-checkpoint ./checkpoints/epoch_49.json
-```
-
-### P-DARTS
-
-[Progressive Differentiable Architecture Search: Bridging the Depth Gap between Search and Evaluation](https://arxiv.org/abs/1904.12760) bases on [DARTS](#DARTS). It's contribution on algorithm is to introduce an efficient algorithm which allows the depth of searched architectures to grow gradually during the training procedure.
-
-#### Usage
-
-```bash
-# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
-git clone https://github.com/Microsoft/nni.git
-
-# search the best architecture
-cd examples/nas/pdarts
-python3 search.py
-
-# train the best architecture, it's the same progress as darts.
-cd ../darts
-python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json
-```
-
 ## Use NNI API
 
 NOTE, we are trying to support various NAS algorithms with unified programming interface, and it's in very experimental stage. It means the current programing interface may be updated in future.
@@ -104,7 +43,7 @@ The programming interface of designing and searching a model is often demanded i
 1. When designing a neural network, there may be multiple operation choices on a layer, sub-model, or connection, and it's undetermined which one or combination performs  best. So, it needs an easy way to express the candidate layers or sub-models.
 2. When applying NAS on a neural network, it needs an unified way to express the search space of architectures, so that it doesn't need to update trial code for different searching algorithms.
 
-NNI proposed API is [here](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/nas/pytorch). And [here](https://github.com/microsoft/nni/tree/master/examples/nas/darts) is an example of NAS implementation, which bases on NNI proposed interface.
+NNI proposed API is [here](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/nas/pytorch). And [here](https://github.com/microsoft/nni/tree/master/examples/nas/naive) is an example of NAS implementation, which bases on NNI proposed interface.
 
 [1]: https://arxiv.org/abs/1802.03268
 [2]: https://arxiv.org/abs/1707.07012
diff --git a/docs/en_US/NAS/PDARTS.md b/docs/en_US/NAS/PDARTS.md
new file mode 100644
index 0000000000..e7ebede1c8
--- /dev/null
+++ b/docs/en_US/NAS/PDARTS.md
@@ -0,0 +1,18 @@
+# P-DARTS
+
+## Examples
+
+[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/pdarts)
+
+```bash
+# In case NNI code is not cloned. If the code is cloned already, ignore this line and enter code folder.
+git clone https://github.com/Microsoft/nni.git
+
+# search the best architecture
+cd examples/nas/pdarts
+python3 search.py
+
+# train the best architecture, it's the same progress as darts.
+cd ../darts
+python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json
+```
diff --git a/docs/en_US/NAS/SPOS.md b/docs/en_US/NAS/SPOS.md
new file mode 100644
index 0000000000..189310c1a1
--- /dev/null
+++ b/docs/en_US/NAS/SPOS.md
@@ -0,0 +1,119 @@
+# Single Path One-Shot (SPOS)
+
+## Introduction
+
+Proposed in [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) is a one-shot NAS method that addresses the difficulties in training One-Shot NAS models by constructing a simplified supernet trained with an uniform path sampling method, so that all underlying architectures (and their weights) get trained fully and equally. An evolutionary algorithm is then applied to efficiently search for the best-performing architectures without any fine tuning.
+
+Implementation on NNI is based on [official repo](https://github.com/megvii-model/SinglePathOneShot). We implement a trainer that trains the supernet and a evolution tuner that leverages the power of NNI framework that speeds up the evolutionary search phase. We have also shown 
+
+## Examples
+
+Here is a use case, which is the search space in paper, and the way to use flops limit to perform uniform sampling.
+
+[Example code](https://github.com/microsoft/nni/tree/master/examples/nas/spos)
+
+### Requirements
+
+NVIDIA DALI >= 0.16 is needed as we use DALI to accelerate the data loading of ImageNet. [Installation guide](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html)
+
+Download the flops lookup table from [here](https://1drv.ms/u/s!Am_mmG2-KsrnajesvSdfsq_cN48?e=aHVppN) (maintained by [Megvii](https://github.com/megvii-model)).
+Put `op_flops_dict.pkl` and `checkpoint-150000.pth.tar` (if you don't want to retrain the supernet) under `data` directory.
+
+Prepare ImageNet in the standard format (follow the script [here](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4)). Linking it to `data/imagenet` will be more convenient.
+
+After preparation, it's expected to have the following code structure:
+
+```
+spos
+├── architecture_final.json
+├── blocks.py
+├── config_search.yml
+├── data
+│   ├── imagenet
+│   │   ├── train
+│   │   └── val
+│   └── op_flops_dict.pkl
+├── dataloader.py
+├── network.py
+├── readme.md
+├── scratch.py
+├── supernet.py
+├── tester.py
+├── tuner.py
+└── utils.py
+```
+
+### Step 1. Train Supernet
+
+```
+python supernet.py
+```
+
+Will export the checkpoint to `checkpoints` directory, for the next step.
+
+NOTE: The data loading used in the official repo is [slightly different from usual](https://github.com/megvii-model/SinglePathOneShot/issues/5), as they use BGR tensor and keep the values between 0 and 255 intentionally to align with their own DL framework. The option `--spos-preprocessing` will simulate the behavior used originally and enable you to use the checkpoints pretrained.
+
+### Step 2. Evolution Search
+
+Single Path One-Shot leverages evolution algorithm to search for the best architecture. The tester, which is responsible for testing the sampled architecture, recalculates all the batch norm for a subset of training images, and evaluates the architecture on the full validation set.
+
+In order to make the tuner aware of the flops limit and have the ability to calculate the flops, we created a new tuner called `EvolutionWithFlops` in `tuner.py`, inheriting the tuner in SDK.
+
+To have a search space ready for NNI framework, first run
+
+```
+nnictl ss_gen -t "python tester.py"
+```
+
+This will generate a file called `nni_auto_gen_search_space.json`, which is a serialized representation of your search space.
+
+By default, it will use `checkpoint-150000.pth.tar` downloaded previously. In case you want to use the checkpoint trained by yourself from the last step, specify `--checkpoint` in the command in `config_search.yml`.
+
+Then search with evolution tuner.
+
+```
+nnictl create --config config_search.yml
+```
+
+The final architecture exported from every epoch of evolution can be found in `checkpoints` under the working directory of your tuner, which, by default, is `$HOME/nni/experiments/your_experiment_id/log`.
+
+### Step 3. Train from Scratch
+
+```
+python scratch.py
+```
+
+By default, it will use `architecture_final.json`. This architecture is provided by the official repo (converted into NNI format). You can use any architecture (e.g., the architecture found in step 2) with `--fixed-arc` option.
+
+## Reference
+
+### PyTorch
+
+```eval_rst
+..  autoclass:: nni.nas.pytorch.spos.SPOSEvolution
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.spos.SPOSSupernetTrainer
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.spos.SPOSSupernetTrainingMutator
+    :members:
+
+    .. automethod:: __init__
+```
+
+## Known Limitations
+
+* Block search only. Channel search is not supported yet.
+* Only GPU version is provided here.
+
+## Current Reproduction Results
+
+Reproduction is still undergoing. Due to the gap between official release and original paper, we compare our current results with official repo (our run) and paper.
+
+* Evolution phase is almost aligned with official repo. Our evolution algorithm shows a converging trend and reaches ~65% accuracy at the end of search. Nevertheless, this result is not on par with paper. For details, please refer to [this issue](https://github.com/megvii-model/SinglePathOneShot/issues/6).
+* Retrain phase is not aligned. Our retraining code, which uses the architecture released by the authors, reaches 72.14% accuracy, still having a gap towards 73.61% by official release and 74.3% reported in original paper.
diff --git a/docs/en_US/nas.rst b/docs/en_US/nas.rst
index 2228e52d76..32c235b3bb 100644
--- a/docs/en_US/nas.rst
+++ b/docs/en_US/nas.rst
@@ -22,4 +22,5 @@ For details, please refer to the following tutorials:
     NAS Interface <NAS/NasInterface>
     ENAS <NAS/ENAS>
     DARTS <NAS/DARTS>
-    P-DARTS <NAS/Overview>
+    P-DARTS <NAS/PDARTS>
+    SPOS <NAS/SPOS>
diff --git a/examples/nas/darts/README.md b/examples/nas/darts/README.md
new file mode 100644
index 0000000000..6977be71ef
--- /dev/null
+++ b/examples/nas/darts/README.md
@@ -0,0 +1 @@
+[Documentation](https://nni.readthedocs.io/en/latest/NAS/DARTS.html)
diff --git a/examples/nas/enas/README.md b/examples/nas/enas/README.md
new file mode 100644
index 0000000000..c942ff41ad
--- /dev/null
+++ b/examples/nas/enas/README.md
@@ -0,0 +1 @@
+[Documentation](https://nni.readthedocs.io/en/latest/NAS/ENAS.html)
diff --git a/examples/nas/naive/README.md b/examples/nas/naive/README.md
new file mode 100644
index 0000000000..871d7f0fdd
--- /dev/null
+++ b/examples/nas/naive/README.md
@@ -0,0 +1 @@
+This is a naive example that demonstrates how to use NNI interface to implement a NAS search space.
\ No newline at end of file
diff --git a/examples/nas/pdarts/README.md b/examples/nas/pdarts/README.md
new file mode 100644
index 0000000000..15465360b1
--- /dev/null
+++ b/examples/nas/pdarts/README.md
@@ -0,0 +1 @@
+[Documentation](https://nni.readthedocs.io/en/latest/NAS/PDARTS.html)
diff --git a/examples/nas/spos/README.md b/examples/nas/spos/README.md
index 0bba5968f9..e9d3fafc86 100644
--- a/examples/nas/spos/README.md
+++ b/examples/nas/spos/README.md
@@ -1,90 +1 @@
-# Single Path One-Shot Neural Architecture Search with Uniform Sampling
-
-Single Path One-Shot by Megvii Research. [Paper link](https://arxiv.org/abs/1904.00420). [Official repo](https://github.com/megvii-model/SinglePathOneShot).
-
-Block search only. Channel search is not supported yet.
-
-Only GPU version is provided here.
-
-## Preparation
-
-### Requirements
-
-* PyTorch >= 1.2
-* NVIDIA DALI >= 0.16 as we use DALI to accelerate the data loading of ImageNet. [Installation guide](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html)
-
-### Data
-
-Need to download the flops lookup table from [here](https://1drv.ms/u/s!Am_mmG2-KsrnajesvSdfsq_cN48?e=aHVppN).
-Put `op_flops_dict.pkl` and `checkpoint-150000.pth.tar` (if you don't want to retrain the supernet) under `data` directory.
-
-Prepare ImageNet in the standard format (follow the script [here](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4)). Linking it to `data/imagenet` will be more convenient.
-
-After preparation, it's expected to have the following code structure:
-
-```
-spos
-├── architecture_final.json
-├── blocks.py
-├── config_search.yml
-├── data
-│   ├── imagenet
-│   │   ├── train
-│   │   └── val
-│   └── op_flops_dict.pkl
-├── dataloader.py
-├── network.py
-├── readme.md
-├── scratch.py
-├── supernet.py
-├── tester.py
-├── tuner.py
-└── utils.py
-```
-
-## Step 1. Train Supernet
-
-```
-python supernet.py
-```
-
-Will export the checkpoint to `checkpoints` directory, for the next step.
-
-NOTE: The data loading used in the official repo is [slightly different from usual](https://github.com/megvii-model/SinglePathOneShot/issues/5), as they use BGR tensor and keep the values between 0 and 255 intentionally to align with their own DL framework. The option `--spos-preprocessing` will simulate the behavior used originally and enable you to use the checkpoints pretrained.
-
-## Step 2. Evolution Search
-
-Single Path One-Shot leverages evolution algorithm to search for the best architecture. The tester, which is responsible for testing the sampled architecture, recalculates all the batch norm for a subset of training images, and evaluates the architecture on the full validation set.
-
-To have a search space ready for NNI framework, first run
-
-```
-nnictl ss_gen -t "python tester.py"
-```
-
-This will generate a file called `nni_auto_gen_search_space.json`, which is a serialized representation of your search space.
-
-By default, it will use `checkpoint-150000.pth.tar` downloaded previously. In case you want to use the checkpoint trained by yourself from the last step, specify `--checkpoint` in the command in `config_search.yml`.
-
-Then search with evolution tuner.
-
-```
-nnictl create --config config_search.yml
-```
-
-The final architecture exported from every epoch of evolution can be found in `checkpoints` under the working directory of your tuner, which, by default, is `$HOME/nni/experiments/your_experiment_id/log`.
-
-## Step 3. Train from Scratch
-
-```
-python scratch.py
-```
-
-By default, it will use `architecture_final.json`. This architecture is provided by the official repo (converted into NNI format). You can use any architecture (e.g., the architecture found in step 2) with `--fixed-arc` option.
-
-## Current Reproduction Results
-
-Reproduction is still undergoing. Due to the gap between official release and original paper, we compare our current results with official repo (our run) and paper.
-
-* Evolution phase is almost aligned with official repo. Our evolution algorithm shows a converging trend and reaches ~65% accuracy at the end of search. Nevertheless, this result is not on par with paper. For details, please refer to [this issue](https://github.com/megvii-model/SinglePathOneShot/issues/6).
-* Retrain phase is not aligned. Our retraining code, which uses the architecture released by the authors, reaches 72.14% accuracy, still having a gap towards 73.61% by official release and 74.3% reported in original paper.
+[Documentation](https://nni.readthedocs.io/en/latest/NAS/SPOS.html)
diff --git a/src/sdk/pynni/nni/nas/pytorch/darts/mutator.py b/src/sdk/pynni/nni/nas/pytorch/darts/mutator.py
index b257e32351..b3a21f3a31 100644
--- a/src/sdk/pynni/nni/nas/pytorch/darts/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/darts/mutator.py
@@ -14,6 +14,26 @@
 
 
 class DartsMutator(Mutator):
+    """
+    Connects the model in a DARTS (differentiable) way.
+
+    An extra connection is automatically inserted for each LayerChoice, when this connection is selected, there is no
+    op on this LayerChoice (namely a ``ZeroOp``), in which case, every element in the exported choice list is ``false``
+    (not chosen).
+
+    All input choice will be fully connected in the search phase. On exporting, the input choice will choose inputs based
+    on keys in ``choose_from``. If the keys were to be keys of LayerChoices, the top logit of the corresponding LayerChoice
+    will join the competition of input choice to compete against other logits. Otherwise, the logit will be assumed 0.
+
+    It's possible to cut branches by setting parameter ``choices`` in a particular position to ``-inf``. After softmax, the
+    value would be 0. Framework will ignore 0 values and not connect. Note that the gradient on the ``-inf`` location will
+    be 0. Since manipulations with ``-inf`` will be ``nan``, you need to handle the gradient update phase carefully.
+
+    Attributes
+    ----------
+    choices: ParameterDict
+        dict that maps keys of LayerChoices to weighted-connection float tensors.
+    """
     def __init__(self, model):
         super().__init__(model)
         self.choices = nn.ParameterDict()
diff --git a/src/sdk/pynni/nni/nas/pytorch/darts/trainer.py b/src/sdk/pynni/nni/nas/pytorch/darts/trainer.py
index 2032631b2b..9ea2085852 100644
--- a/src/sdk/pynni/nni/nas/pytorch/darts/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/darts/trainer.py
@@ -19,6 +19,42 @@ def __init__(self, model, loss, metrics,
                  optimizer, num_epochs, dataset_train, dataset_valid,
                  mutator=None, batch_size=64, workers=4, device=None, log_frequency=None,
                  callbacks=None, arc_learning_rate=3.0E-4, unrolled=False):
+        """
+        Initialize a DartsTrainer.
+
+        Parameters
+        ----------
+        model : nn.Module
+            PyTorch model to be trained.
+        loss : callable
+            Receives logits and ground truth label, return a loss tensor.
+        metrics : callable
+            Receives logits and ground truth label, return a dict of metrics.
+        optimizer : Optimizer
+            The optimizer used for optimizing the model.
+        num_epochs : int
+            Number of epochs planned for training.
+        dataset_train : Dataset
+            Dataset for training. Will be split for training weights and architecture weights.
+        dataset_valid : Dataset
+            Dataset for testing.
+        mutator : DartsMutator
+            Use in case of customizing your own DartsMutator. By default will instantiate a DartsMutator.
+        batch_size : int
+            Batch size.
+        workers : int
+            Workers for data loading.
+        device : torch.device
+            ``torch.device("cpu")`` or ``torch.device("cuda")``.
+        log_frequency : int
+            Step count per logging.
+        callbacks : list of Callback
+            list of callbacks to trigger at events.
+        arc_learning_rate : float
+            Learning rate of architecture parameters.
+        unrolled : float
+            ``True`` if using second order optimization, else first order optimization.
+        """
         super().__init__(model, mutator if mutator is not None else DartsMutator(model),
                          loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid,
                          batch_size, workers, device, log_frequency, callbacks)
diff --git a/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py b/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
index ec079c3e5d..1f2f9bd7ad 100644
--- a/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
@@ -31,6 +31,31 @@ class EnasMutator(Mutator):
 
     def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, cell_exit_extra_step=False,
                  skip_target=0.4, branch_bias=0.25):
+        """
+        Initialize a EnasMutator.
+
+        Parameters
+        ----------
+        model : nn.Module
+            PyTorch model.
+        lstm_size : int
+            Controller LSTM hidden units.
+        lstm_num_layers : int
+            Number of layers for stacked LSTM.
+        tanh_constant : float
+            Logits will be equal to ``tanh_constant * tanh(logits)``. Don't use ``tanh`` if this value is ``None``.
+        cell_exit_extra_step : bool
+            If true, RL controller will perform an extra step at the exit of each MutableScope, dump the hidden state
+            and mark it as the hidden state of this MutableScope. This is to align with the original implementation of paper.
+        skip_target : float
+            Target probability that skipconnect will appear.
+        branch_bias : float
+            Manual bias applied to make some operations more likely to be chosen.
+            Currently this is implemented with a hardcoded match rule that aligns with original repo.
+            If a mutable has a ``reduce`` in its key, all its op choices
+            that contains `conv` in their typename will receive a bias of ``+self.branch_bias`` initially; while others
+            receive a bias of ``-self.branch_bias``.
+        """
         super().__init__(model)
         self.lstm_size = lstm_size
         self.lstm_num_layers = lstm_num_layers
diff --git a/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py b/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
index 4c198594ab..6cd5924f39 100644
--- a/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
@@ -18,6 +18,54 @@ def __init__(self, model, loss, metrics, reward_function,
                  mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, callbacks=None,
                  entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
                  mutator_lr=0.00035, mutator_steps_aggregate=20, mutator_steps=50, aux_weight=0.4):
+        """
+        Initialize an EnasTrainer.
+
+        Parameters
+        ----------
+        model : nn.Module
+            PyTorch model to be trained.
+        loss : callable
+            Receives logits and ground truth label, return a loss tensor.
+        metrics : callable
+            Receives logits and ground truth label, return a dict of metrics.
+        reward_function : callable
+            Receives logits and ground truth label, return a tensor, which will be feeded to RL controller as reward.
+        optimizer : Optimizer
+            The optimizer used for optimizing the model.
+        num_epochs : int
+            Number of epochs planned for training.
+        dataset_train : Dataset
+            Dataset for training. Will be split for training weights and architecture weights.
+        dataset_valid : Dataset
+            Dataset for testing.
+        mutator : EnasMutator
+            Use when customizing your own mutator or a mutator with customized parameters.
+        batch_size : int
+            Batch size.
+        workers : int
+            Workers for data loading.
+        device : torch.device
+            ``torch.device("cpu")`` or ``torch.device("cuda")``.
+        log_frequency : int
+            Step count per logging.
+        callbacks : list of Callback
+            list of callbacks to trigger at events.
+        entropy_weight : float
+            Weight of sample entropy loss.
+        skip_weight : float
+            Weight of skip penalty loss.
+        baseline_decay : float
+            Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
+        mutator_lr : float
+            Learning rate for RL controller.
+        mutator_steps_aggregate : int
+            Number of steps that will be aggregated into one mini-batch for RL controller.
+        mutator_steps : int
+            Number of mini-batches for each epoch of RL controller learning.
+        aux_weight : float
+            Weight of auxiliary head loss. ``aux_weight * aux_loss`` will be added to total loss.
+        """
         super().__init__(model, mutator if mutator is not None else EnasMutator(model),
                          loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid,
                          batch_size, workers, device, log_frequency, callbacks)
diff --git a/src/sdk/pynni/nni/nas/pytorch/spos/evolution.py b/src/sdk/pynni/nni/nas/pytorch/spos/evolution.py
index 3541c81fd7..2eb07fac10 100644
--- a/src/sdk/pynni/nni/nas/pytorch/spos/evolution.py
+++ b/src/sdk/pynni/nni/nas/pytorch/spos/evolution.py
@@ -211,6 +211,7 @@ def export_results(self, result):
         Parameters
         ----------
         result : dict
+            Chosen architectures to be exported.
         """
         os.makedirs("checkpoints", exist_ok=True)
         for i, cand in enumerate(result):
diff --git a/src/sdk/pynni/nni/nas/pytorch/spos/mutator.py b/src/sdk/pynni/nni/nas/pytorch/spos/mutator.py
index 88a01eeeaf..838f2fcd05 100644
--- a/src/sdk/pynni/nni/nas/pytorch/spos/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/spos/mutator.py
@@ -17,6 +17,7 @@ def __init__(self, model, flops_func=None, flops_lb=None, flops_ub=None,
         Parameters
         ----------
         model : nn.Module
+            PyTorch model.
         flops_func : callable
             Callable that takes a candidate from `sample_search` and returns its candidate. When `flops_func`
             is None, functions related to flops will be deactivated.
diff --git a/src/sdk/pynni/nni/nas/pytorch/spos/trainer.py b/src/sdk/pynni/nni/nas/pytorch/spos/trainer.py
index ab23760bf9..3b5e69f8cd 100644
--- a/src/sdk/pynni/nni/nas/pytorch/spos/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/spos/trainer.py
@@ -21,6 +21,37 @@ def __init__(self, model, loss, metrics,
                  optimizer, num_epochs, train_loader, valid_loader,
                  mutator=None, batch_size=64, workers=4, device=None, log_frequency=None,
                  callbacks=None):
+        """
+        Parameters
+        ----------
+        model : nn.Module
+            Model with mutables.
+        mutator : Mutator
+            A mutator object that has been initialized with the model.
+        loss : callable
+            Called with logits and targets. Returns a loss tensor.
+        metrics : callable
+            Returns a dict that maps metrics keys to metrics data.
+        optimizer : Optimizer
+            Optimizer that optimizes the model.
+        num_epochs : int
+            Number of epochs of training.
+        train_loader : iterable
+            Data loader of training. Raise ``StopIteration`` when one epoch is exhausted.
+        dataset_valid : iterable
+            Data loader of validation. Raise ``StopIteration`` when one epoch is exhausted.
+        batch_size : int
+            Batch size.
+        workers: int
+            Number of threads for data preprocessing. Not used for this trainer. Maybe removed in future.
+        device : torch.device
+            Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
+            automatic detects GPU and selects GPU first.
+        log_frequency : int
+            Number of mini-batches to log metrics.
+        callbacks : list of Callback
+            Callbacks to plug into the trainer. See Callbacks.
+        """
         assert torch.cuda.is_available()
         super().__init__(model, mutator if mutator is not None else SPOSSupernetTrainingMutator(model),
                          loss, metrics, optimizer, num_epochs, None, None,
diff --git a/src/sdk/pynni/nni/nas/pytorch/trainer.py b/src/sdk/pynni/nni/nas/pytorch/trainer.py
index 32ba2e2709..218d6a2d50 100644
--- a/src/sdk/pynni/nni/nas/pytorch/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/trainer.py
@@ -52,7 +52,7 @@ def __init__(self, model, mutator, loss, metrics, optimizer, num_epochs,
         workers : int
             Number of workers used in data preprocessing.
         device : torch.device
-            Device object. Either `torch.device("cuda")` or torch.device("cpu")`. When `None`, trainer will
+            Device object. Either ``torch.device("cuda")`` or ``torch.device("cpu")``. When ``None``, trainer will
             automatic detects GPU and selects GPU first.
         log_frequency : int
             Number of mini-batches to log metrics.

From a6467ad88d1090543b842804d28e7f162b1f1c02 Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Tue, 31 Dec 2019 13:23:57 +0800
Subject: [PATCH 14/23] release note draft for v1.3 (#1895)

---
 docs/en_US/Release.md | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/docs/en_US/Release.md b/docs/en_US/Release.md
index 811a030b1a..02e6604767 100644
--- a/docs/en_US/Release.md
+++ b/docs/en_US/Release.md
@@ -1,5 +1,38 @@
 # ChangeLog
 
+## Release 1.3 - 12/30/2019
+
+### Major Features
+
+#### Neural Architecture Search Algorithms Support
+* [Single Path One Shot](https://github.com/microsoft/nni/tree/v1.3/examples/nas/spos/) algorithm and the example using it
+
+#### Model Compression Algorithms Support
+* [Knowledge Distillation](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/TrialExample/KDExample.md) algorithm and the example using itExample
+* Pruners
+    * [L2Filter Pruner](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.md#3-l2filter-pruner)
+    * [ActivationAPoZRankFilterPruner](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.md#1-activationapozrankfilterpruner)
+    * [ActivationMeanRankFilterPruner](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Pruner.md#2-activationmeanrankfilterpruner)
+* [BNN Quantizer](https://github.com/microsoft/nni/blob/v1.3/docs/en_US/Compressor/Quantizer.md#bnn-quantizer)
+#### Training Service
+* NFS Support for PAI
+    
+    Instead of using HDFS as default storage, since OpenPAI v0.11, OpenPAI can have NFS or AzureBlob or other storage as default storage. In this release, NNI extended the support for this recent change made by OpenPAI, and could integrate with OpenPAI v0.11 or later version with various default storage.
+
+* Kubeflow update adoption
+
+    Adopted the Kubeflow 0.7's new supports for tf-operator.
+
+### Engineering (code and build automation)
+* Enforced [ESLint](https://eslint.org/) on static code analysis.
+
+### Small changes & Bug Fixes
+* correctly recognize builtin tuner and customized tuner
+* logging in dispatcher base
+* fix the bug where tuner/assessor's failure sometimes kills the experiment.
+* Fix local system as remote machine [issue](https://github.com/microsoft/nni/issues/1852)
+* de-duplicate trial configuration in smac tuner [ticket](https://github.com/microsoft/nni/issues/1364)
+
 ## Release 1.2 - 12/02/2019
 
 ### Major Features

From b8571d32e76f3440b794e44a81f9cdd241f2986c Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Wed, 1 Jan 2020 11:45:01 +0800
Subject: [PATCH 15/23] add dependency for doc build (#1910)

---
 docs/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index ff3b15f98d..35082a1136 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -10,3 +10,4 @@ numpy
 scipy
 coverage
 scikit-learn==0.20
+torch==1.3.1
\ No newline at end of file

From e88383adcb9629baa26f637f057ba60c371da98e Mon Sep 17 00:00:00 2001
From: chicm-ms <38930155+chicm-ms@users.noreply.github.com>
Date: Thu, 2 Jan 2020 11:19:37 +0800
Subject: [PATCH 16/23] Bump handlebars version (#1899)

---
 src/nni_manager/package.json |  2 +-
 src/nni_manager/yarn.lock    | 36 +++++-------------------------------
 2 files changed, 6 insertions(+), 32 deletions(-)

diff --git a/src/nni_manager/package.json b/src/nni_manager/package.json
index 4fb07cf0ac..f79a0a4529 100644
--- a/src/nni_manager/package.json
+++ b/src/nni_manager/package.json
@@ -58,7 +58,7 @@
   },
   "resolutions": {
     "mem": "^4.0.0",
-    "handlebars": "^4.1.0",
+    "handlebars": "^4.5.3",
     "lodash": "^4.17.13",
     "lodash.merge": "^4.6.2",
     "node.extend": "^1.1.7",
diff --git a/src/nni_manager/yarn.lock b/src/nni_manager/yarn.lock
index 09227bdb36..deac0b5c89 100644
--- a/src/nni_manager/yarn.lock
+++ b/src/nni_manager/yarn.lock
@@ -1072,7 +1072,7 @@ debug@^4.0.1, debug@^4.1.0, debug@^4.1.1:
   dependencies:
     ms "^2.1.1"
 
-debuglog@*, debuglog@^1.0.1:
+debuglog@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/debuglog/-/debuglog-1.0.1.tgz#aa24ffb9ac3df9a2351837cfb2d279360cd78492"
 
@@ -1840,9 +1840,9 @@ growl@1.10.5:
   version "1.10.5"
   resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.5.tgz#f2735dc2283674fa67478b10181059355c369e5e"
 
-handlebars@^4.0.11, handlebars@^4.1.0:
-  version "4.1.2"
-  resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.1.2.tgz#b6b37c1ced0306b221e094fc7aca3ec23b131b67"
+handlebars@^4.0.11, handlebars@^4.3.0:
+  version "4.5.3"
+  resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.5.3.tgz#5cf75bd8714f7605713511a56be7c349becb0482"
   dependencies:
     neo-async "^2.6.0"
     optimist "^0.6.1"
@@ -2014,7 +2014,7 @@ import-lazy@^2.1.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/import-lazy/-/import-lazy-2.1.0.tgz#05698e3d45c88e8d7e9d92cb0584e77f096f3e43"
 
-imurmurhash@*, imurmurhash@^0.1.4:
+imurmurhash@^0.1.4:
   version "0.1.4"
   resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea"
 
@@ -2595,10 +2595,6 @@ lockfile@^1.0.4:
   dependencies:
     signal-exit "^3.0.2"
 
-lodash._baseindexof@*:
-  version "3.1.0"
-  resolved "https://registry.yarnpkg.com/lodash._baseindexof/-/lodash._baseindexof-3.1.0.tgz#fe52b53a1c6761e42618d654e4a25789ed61822c"
-
 lodash._baseuniq@~4.6.0:
   version "4.6.0"
   resolved "https://registry.yarnpkg.com/lodash._baseuniq/-/lodash._baseuniq-4.6.0.tgz#0ebb44e456814af7905c6212fa2c9b2d51b841e8"
@@ -2606,28 +2602,10 @@ lodash._baseuniq@~4.6.0:
     lodash._createset "~4.0.0"
     lodash._root "~3.0.0"
 
-lodash._bindcallback@*:
-  version "3.0.1"
-  resolved "https://registry.yarnpkg.com/lodash._bindcallback/-/lodash._bindcallback-3.0.1.tgz#e531c27644cf8b57a99e17ed95b35c748789392e"
-
-lodash._cacheindexof@*:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/lodash._cacheindexof/-/lodash._cacheindexof-3.0.2.tgz#3dc69ac82498d2ee5e3ce56091bafd2adc7bde92"
-
-lodash._createcache@*:
-  version "3.1.2"
-  resolved "https://registry.yarnpkg.com/lodash._createcache/-/lodash._createcache-3.1.2.tgz#56d6a064017625e79ebca6b8018e17440bdcf093"
-  dependencies:
-    lodash._getnative "^3.0.0"
-
 lodash._createset@~4.0.0:
   version "4.0.3"
   resolved "https://registry.yarnpkg.com/lodash._createset/-/lodash._createset-4.0.3.tgz#0f4659fbb09d75194fa9e2b88a6644d363c9fe26"
 
-lodash._getnative@*, lodash._getnative@^3.0.0:
-  version "3.9.1"
-  resolved "https://registry.yarnpkg.com/lodash._getnative/-/lodash._getnative-3.9.1.tgz#570bc7dede46d61cdcde687d65d3eecbaa3aaff5"
-
 lodash._root@~3.0.0:
   version "3.0.1"
   resolved "https://registry.yarnpkg.com/lodash._root/-/lodash._root-3.0.1.tgz#fba1c4524c19ee9a5f8136b4609f017cf4ded692"
@@ -2676,10 +2654,6 @@ lodash.pick@^4.4.0:
   version "4.4.0"
   resolved "https://registry.yarnpkg.com/lodash.pick/-/lodash.pick-4.4.0.tgz#52f05610fff9ded422611441ed1fc123a03001b3"
 
-lodash.restparam@*:
-  version "3.6.1"
-  resolved "https://registry.yarnpkg.com/lodash.restparam/-/lodash.restparam-3.6.1.tgz#936a4e309ef330a7645ed4145986c85ae5b20805"
-
 lodash.unescape@4.0.1:
   version "4.0.1"
   resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c"

From eb39749fa235446ef7960f400bbf4c5de903000f Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Thu, 2 Jan 2020 18:48:02 +0800
Subject: [PATCH 17/23] update README with latest version number (#1913)

---
 README.md          | 8 ++++----
 docs/en_US/conf.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 8b885211c3..0944759e5d 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ The tool manages automated machine learning (AutoML) experiments, **dispatches a
 * Researchers and data scientists who want to easily **implement and experiement new AutoML algorithms**, may it be: hyperparameter tuning algorithm, neural architect search algorithm or model compression algorithm.
 * ML Platform owners who want to **support AutoML in their platform**.
 
-### **NNI v1.2 has been released! &nbsp;<a href="#nni-released-reminder"><img width="48" src="docs/img/release_icon.png"></a>**
+### **NNI v1.3 has been released! &nbsp;<a href="#nni-released-reminder"><img width="48" src="docs/img/release_icon.png"></a>**
 
 ## **NNI capabilities in a glance**
 NNI provides CommandLine Tool as well as an user friendly WebUI to manage training experiements. With the extensible API, you can customize your own AutoML algorithms and training services. To make it easy for new users, NNI also provides a set of build-in stat-of-the-art AutoML algorithms and out of box support for popular training platforms. 
@@ -229,7 +229,7 @@ Linux and MacOS
 * Run the following commands in an environment that has `python >= 3.5`, `git` and `wget`.
 
 ```bash
-    git clone -b v1.2 https://github.com/Microsoft/nni.git
+    git clone -b v1.3 https://github.com/Microsoft/nni.git
     cd nni
     source install.sh
 ```
@@ -239,7 +239,7 @@ Windows
 * Run the following commands in an environment that has `python >=3.5`, `git` and `PowerShell`
 
 ```bash
-  git clone -b v1.2 https://github.com/Microsoft/nni.git
+  git clone -b v1.3 https://github.com/Microsoft/nni.git
   cd nni
   powershell -ExecutionPolicy Bypass -file install.ps1
 ```
@@ -255,7 +255,7 @@ The following example is an experiment built on TensorFlow. Make sure you have *
 * Download the examples via clone the source code.
 
 ```bash
-    git clone -b v1.2 https://github.com/Microsoft/nni.git
+    git clone -b v1.3 https://github.com/Microsoft/nni.git
 ```
 
 Linux and MacOS
diff --git a/docs/en_US/conf.py b/docs/en_US/conf.py
index 9db2b8a62d..d7cd21b58c 100644
--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -22,13 +22,13 @@
 # -- Project information ---------------------------------------------------
 
 project = 'Neural Network Intelligence'
-copyright = '2019, Microsoft'
+copyright = '2020, Microsoft'
 author = 'Microsoft'
 
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = 'v1.2'
+release = 'v1.3'
 
 # -- General configuration ---------------------------------------------------
 

From 70cee7d8858600614e727966a5f6e5f122e69f2b Mon Sep 17 00:00:00 2001
From: Yuge Zhang <v-yugzh@microsoft.com>
Date: Mon, 6 Jan 2020 10:53:39 +0800
Subject: [PATCH 18/23] TextNAS without retrain (#1890)

---
 examples/nas/textnas/README.md                |  45 +++
 examples/nas/textnas/dataloader.py            | 334 ++++++++++++++++++
 examples/nas/textnas/model.py                 | 108 ++++++
 examples/nas/textnas/ops.py                   | 205 +++++++++++
 examples/nas/textnas/search.py                |  89 +++++
 examples/nas/textnas/utils.py                 |  67 ++++
 src/sdk/pynni/nni/nas/pytorch/enas/mutator.py |  21 +-
 src/sdk/pynni/nni/nas/pytorch/enas/trainer.py |  96 +++--
 src/sdk/pynni/nni/nas/pytorch/mutables.py     |   2 +-
 src/sdk/pynni/nni/nas/pytorch/trainer.py      |   4 +-
 src/sdk/pynni/nni/nas/pytorch/utils.py        |  26 +-
 11 files changed, 957 insertions(+), 40 deletions(-)
 create mode 100644 examples/nas/textnas/README.md
 create mode 100644 examples/nas/textnas/dataloader.py
 create mode 100644 examples/nas/textnas/model.py
 create mode 100644 examples/nas/textnas/ops.py
 create mode 100644 examples/nas/textnas/search.py
 create mode 100644 examples/nas/textnas/utils.py

diff --git a/examples/nas/textnas/README.md b/examples/nas/textnas/README.md
new file mode 100644
index 0000000000..fb261ad04d
--- /dev/null
+++ b/examples/nas/textnas/README.md
@@ -0,0 +1,45 @@
+# TextNAS: A Neural Architecture Search Space tailored for Text Representation
+
+TextNAS by MSRA. Official Release.
+
+[Paper link](https://arxiv.org/abs/1912.10729)
+
+## Preparation
+
+Prepare the word vectors and SST dataset, and organize them in data directory as shown below:
+
+```
+textnas
+├── data
+│   ├── sst
+│   │   └── trees
+│   │       ├── dev.txt
+│   │       ├── test.txt
+│   │       └── train.txt
+│   └── glove.840B.300d.txt
+├── dataloader.py
+├── model.py
+├── ops.py
+├── README.md
+├── search.py
+└── utils.py
+```
+
+The following link might be helpful for finding and downloading the corresponding dataset:
+
+* [GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/)
+* [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/sentiment/)
+
+## Search
+
+```
+python search.py
+```
+
+After each search epoch, 10 sampled architectures will be tested directly. Their performances are expected to be 40% - 42% after 10 epochs.
+
+By default, 20 sampled architectures will be exported into `checkpoints` directory for next step.
+
+## Retrain
+
+Not ready.
diff --git a/examples/nas/textnas/dataloader.py b/examples/nas/textnas/dataloader.py
new file mode 100644
index 0000000000..e5a4ed363f
--- /dev/null
+++ b/examples/nas/textnas/dataloader.py
@@ -0,0 +1,334 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import os
+import pickle
+from collections import Counter
+
+import numpy as np
+import torch
+from torch.utils import data
+
+logger = logging.getLogger("nni.textnas")
+
+
+class PTBTree:
+    WORD_TO_WORD_MAPPING = {
+        "{": "-LCB-",
+        "}": "-RCB-"
+    }
+
+    def __init__(self):
+        self.subtrees = []
+        self.word = None
+        self.label = ""
+        self.parent = None
+        self.span = (-1, -1)
+        self.word_vector = None  # HOS, store dx1 RNN word vector
+        self.prediction = None  # HOS, store Kx1 prediction vector
+
+    def is_leaf(self):
+        return len(self.subtrees) == 0
+
+    def set_by_text(self, text, pos=0, left=0):
+        depth = 0
+        right = left
+        for i in range(pos + 1, len(text)):
+            char = text[i]
+            # update the depth
+            if char == "(":
+                depth += 1
+                if depth == 1:
+                    subtree = PTBTree()
+                    subtree.parent = self
+                    subtree.set_by_text(text, i, right)
+                    right = subtree.span[1]
+                    self.span = (left, right)
+                    self.subtrees.append(subtree)
+            elif char == ")":
+                depth -= 1
+                if len(self.subtrees) == 0:
+                    pos = i
+                    for j in range(i, 0, -1):
+                        if text[j] == " ":
+                            pos = j
+                            break
+                    self.word = text[pos + 1:i]
+                    self.span = (left, left + 1)
+
+            # we've reached the end of the category that is the root of this subtree
+            if depth == 0 and char == " " and self.label == "":
+                self.label = text[pos + 1:i]
+            # we've reached the end of the scope for this bracket
+            if depth < 0:
+                break
+
+        # Fix some issues with variation in output, and one error in the treebank
+        # for a word with a punctuation POS
+        self.standardise_node()
+
+    def standardise_node(self):
+        if self.word in self.WORD_TO_WORD_MAPPING:
+            self.word = self.WORD_TO_WORD_MAPPING[self.word]
+
+    def __repr__(self, single_line=True, depth=0):
+        ans = ""
+        if not single_line and depth > 0:
+            ans = "\n" + depth * "\t"
+        ans += "(" + self.label
+        if self.word is not None:
+            ans += " " + self.word
+        for subtree in self.subtrees:
+            if single_line:
+                ans += " "
+            ans += subtree.__repr__(single_line, depth + 1)
+        ans += ")"
+        return ans
+
+
+def read_tree(source):
+    cur_text = []
+    depth = 0
+    while True:
+        line = source.readline()
+        # Check if we are out of input
+        if line == "":
+            return None
+        # strip whitespace and only use if this contains something
+        line = line.strip()
+        if line == "":
+            continue
+        cur_text.append(line)
+        # Update depth
+        for char in line:
+            if char == "(":
+                depth += 1
+            elif char == ")":
+                depth -= 1
+        # At depth 0 we have a complete tree
+        if depth == 0:
+            tree = PTBTree()
+            tree.set_by_text(" ".join(cur_text))
+            return tree
+    return None
+
+
+def read_trees(source, max_sents=-1):
+    with open(source) as fp:
+        trees = []
+        while True:
+            tree = read_tree(fp)
+            if tree is None:
+                break
+            trees.append(tree)
+            if len(trees) >= max_sents > 0:
+                break
+        return trees
+
+
+class SSTDataset(data.Dataset):
+    def __init__(self, sents, mask, labels):
+        self.sents = sents
+        self.labels = labels
+        self.mask = mask
+
+    def __getitem__(self, index):
+        return (self.sents[index], self.mask[index]), self.labels[index]
+
+    def __len__(self):
+        return len(self.sents)
+
+
+def sst_get_id_input(content, word_id_dict, max_input_length):
+    words = content.split(" ")
+    sentence = [word_id_dict["<pad>"]] * max_input_length
+    mask = [0] * max_input_length
+    unknown = word_id_dict["<unknown>"]
+    for i, word in enumerate(words[:max_input_length]):
+        sentence[i] = word_id_dict.get(word, unknown)
+        mask[i] = 1
+    return sentence, mask
+
+
+def sst_get_phrases(trees, sample_ratio=1.0, is_binary=False, only_sentence=False):
+    all_phrases = []
+    for tree in trees:
+        if only_sentence:
+            sentence = get_sentence_by_tree(tree)
+            label = int(tree.label)
+            pair = (sentence, label)
+            all_phrases.append(pair)
+        else:
+            phrases = get_phrases_by_tree(tree)
+            sentence = get_sentence_by_tree(tree)
+            pair = (sentence, int(tree.label))
+            all_phrases.append(pair)
+            all_phrases += phrases
+    if sample_ratio < 1.:
+        np.random.shuffle(all_phrases)
+    result_phrases = []
+    for pair in all_phrases:
+        if is_binary:
+            phrase, label = pair
+            if label <= 1:
+                pair = (phrase, 0)
+            elif label >= 3:
+                pair = (phrase, 1)
+            else:
+                continue
+        if sample_ratio == 1.:
+            result_phrases.append(pair)
+        else:
+            rand_portion = np.random.random()
+            if rand_portion < sample_ratio:
+                result_phrases.append(pair)
+    return result_phrases
+
+
+def get_phrases_by_tree(tree):
+    phrases = []
+    if tree is None:
+        return phrases
+    if tree.is_leaf():
+        pair = (tree.word, int(tree.label))
+        phrases.append(pair)
+        return phrases
+    left_child_phrases = get_phrases_by_tree(tree.subtrees[0])
+    right_child_phrases = get_phrases_by_tree(tree.subtrees[1])
+    phrases.extend(left_child_phrases)
+    phrases.extend(right_child_phrases)
+    sentence = get_sentence_by_tree(tree)
+    pair = (sentence, int(tree.label))
+    phrases.append(pair)
+    return phrases
+
+
+def get_sentence_by_tree(tree):
+    if tree is None:
+        return ""
+    if tree.is_leaf():
+        return tree.word
+    left_sentence = get_sentence_by_tree(tree.subtrees[0])
+    right_sentence = get_sentence_by_tree(tree.subtrees[1])
+    sentence = left_sentence + " " + right_sentence
+    return sentence.strip()
+
+
+def get_word_id_dict(word_num_dict, word_id_dict, min_count):
+    z = [k for k in sorted(word_num_dict.keys())]
+    for word in z:
+        count = word_num_dict[word]
+        if count >= min_count:
+            index = len(word_id_dict)
+            if word not in word_id_dict:
+                word_id_dict[word] = index
+    return word_id_dict
+
+
+def load_word_num_dict(phrases, word_num_dict):
+    for sentence, _ in phrases:
+        words = sentence.split(" ")
+        for cur_word in words:
+            word = cur_word.strip()
+            word_num_dict[word] += 1
+    return word_num_dict
+
+
+def init_trainable_embedding(embedding_path, word_id_dict, embed_dim=300):
+    word_embed_model = load_glove_model(embedding_path, embed_dim)
+    assert word_embed_model["pool"].shape[1] == embed_dim
+    embedding = np.random.random([len(word_id_dict), embed_dim]).astype(np.float32) / 2.0 - 0.25
+    embedding[0] = np.zeros(embed_dim)  # PAD
+    embedding[1] = (np.random.rand(embed_dim) - 0.5) / 2  # UNK
+    for word, idx in word_id_dict.items():
+        if idx == 0 or idx == 1:
+            continue
+        if word in word_embed_model["mapping"]:
+            embedding[idx] = word_embed_model["pool"][word_embed_model["mapping"][word]]
+        else:
+            embedding[idx] = np.random.rand(embed_dim) / 2.0 - 0.25
+    return embedding
+
+
+def sst_get_trainable_data(phrases, word_id_dict, max_input_length):
+    texts, labels, mask = [], [], []
+
+    for phrase, label in phrases:
+        if not phrase.split():
+            continue
+        phrase_split, mask_split = sst_get_id_input(phrase, word_id_dict, max_input_length)
+        texts.append(phrase_split)
+        labels.append(int(label))
+        mask.append(mask_split)  # field_input is mask
+    labels = np.array(labels, dtype=np.int64)
+    texts = np.reshape(texts, [-1, max_input_length]).astype(np.int32)
+    mask = np.reshape(mask, [-1, max_input_length]).astype(np.int32)
+
+    return SSTDataset(texts, mask, labels)
+
+
+def load_glove_model(filename, embed_dim):
+    if os.path.exists(filename + ".cache"):
+        logger.info("Found cache. Loading...")
+        with open(filename + ".cache", "rb") as fp:
+            return pickle.load(fp)
+    embedding = {"mapping": dict(), "pool": []}
+    with open(filename) as f:
+        for i, line in enumerate(f):
+            line = line.rstrip("\n")
+            vocab_word, *vec = line.rsplit(" ", maxsplit=embed_dim)
+            assert len(vec) == 300, "Unexpected line: '%s'" % line
+            embedding["pool"].append(np.array(list(map(float, vec)), dtype=np.float32))
+            embedding["mapping"][vocab_word] = i
+    embedding["pool"] = np.stack(embedding["pool"])
+    with open(filename + ".cache", "wb") as fp:
+        pickle.dump(embedding, fp)
+    return embedding
+
+
+def read_data_sst(data_path, max_input_length=64, min_count=1, train_with_valid=False,
+                  train_ratio=1., valid_ratio=1., is_binary=False, only_sentence=False):
+    word_id_dict = dict()
+    word_num_dict = Counter()
+
+    sst_path = os.path.join(data_path, "sst")
+    logger.info("Reading SST data...")
+    train_file_name = os.path.join(sst_path, "trees", "train.txt")
+    valid_file_name = os.path.join(sst_path, "trees", "dev.txt")
+    test_file_name = os.path.join(sst_path, "trees", "test.txt")
+    train_trees = read_trees(train_file_name)
+    train_phrases = sst_get_phrases(train_trees, train_ratio, is_binary, only_sentence)
+    logger.info("Finish load train phrases.")
+    valid_trees = read_trees(valid_file_name)
+    valid_phrases = sst_get_phrases(valid_trees, valid_ratio, is_binary, only_sentence)
+    logger.info("Finish load valid phrases.")
+    if train_with_valid:
+        train_phrases += valid_phrases
+    test_trees = read_trees(test_file_name)
+    test_phrases = sst_get_phrases(test_trees, valid_ratio, is_binary, only_sentence=True)
+    logger.info("Finish load test phrases.")
+
+    # get word_id_dict
+    word_id_dict["<pad>"] = 0
+    word_id_dict["<unknown>"] = 1
+    load_word_num_dict(train_phrases, word_num_dict)
+    logger.info("Finish load train words: %d.", len(word_num_dict))
+    load_word_num_dict(valid_phrases, word_num_dict)
+    load_word_num_dict(test_phrases, word_num_dict)
+    logger.info("Finish load valid+test words: %d.", len(word_num_dict))
+    word_id_dict = get_word_id_dict(word_num_dict, word_id_dict, min_count)
+    logger.info("After trim vocab length: %d.", len(word_id_dict))
+
+    logger.info("Loading embedding...")
+    embedding = init_trainable_embedding(os.path.join(data_path, "glove.840B.300d.txt"), word_id_dict)
+    logger.info("Finish initialize word embedding.")
+
+    dataset_train = sst_get_trainable_data(train_phrases, word_id_dict, max_input_length)
+    logger.info("Loaded %d training samples.", len(dataset_train))
+    dataset_valid = sst_get_trainable_data(valid_phrases, word_id_dict, max_input_length)
+    logger.info("Loaded %d validation samples.", len(dataset_valid))
+    dataset_test = sst_get_trainable_data(test_phrases, word_id_dict, max_input_length)
+    logger.info("Loaded %d test samples.", len(dataset_test))
+
+    return dataset_train, dataset_valid, dataset_test, torch.from_numpy(embedding)
diff --git a/examples/nas/textnas/model.py b/examples/nas/textnas/model.py
new file mode 100644
index 0000000000..631c0e134d
--- /dev/null
+++ b/examples/nas/textnas/model.py
@@ -0,0 +1,108 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import numpy as np
+import torch
+import torch.nn as nn
+from nni.nas.pytorch import mutables
+
+from ops import ConvBN, LinearCombine, AvgPool, MaxPool, RNN, Attention, BatchNorm
+from utils import GlobalMaxPool, GlobalAvgPool
+
+
+class Layer(mutables.MutableScope):
+    def __init__(self, key, prev_keys, hidden_units, choose_from_k, cnn_keep_prob, lstm_keep_prob, att_keep_prob, att_mask):
+        super(Layer, self).__init__(key)
+
+        def conv_shortcut(kernel_size):
+            return ConvBN(kernel_size, hidden_units, hidden_units, cnn_keep_prob, False, True)
+
+        self.n_candidates = len(prev_keys)
+        if self.n_candidates:
+            self.prec = mutables.InputChoice(choose_from=prev_keys[-choose_from_k:], n_chosen=1)
+        else:
+            # first layer, skip input choice
+            self.prec = None
+        self.op = mutables.LayerChoice([
+            conv_shortcut(1),
+            conv_shortcut(3),
+            conv_shortcut(5),
+            conv_shortcut(7),
+            AvgPool(3, False, True),
+            MaxPool(3, False, True),
+            RNN(hidden_units, lstm_keep_prob),
+            Attention(hidden_units, 4, att_keep_prob, att_mask)
+        ])
+        if self.n_candidates:
+            self.skipconnect = mutables.InputChoice(choose_from=prev_keys)
+        else:
+            self.skipconnect = None
+        self.bn = BatchNorm(hidden_units, False, True)
+
+    def forward(self, last_layer, prev_layers, mask):
+        # pass an extra last_layer to deal with layer 0 (prev_layers is empty)
+        if self.prec is None:
+            prec = last_layer
+        else:
+            prec = self.prec(prev_layers[-self.prec.n_candidates:])  # skip first
+        out = self.op(prec, mask)
+        if self.skipconnect is not None:
+            connection = self.skipconnect(prev_layers[-self.skipconnect.n_candidates:])
+            if connection is not None:
+                out += connection
+        out = self.bn(out, mask)
+        return out
+
+
+class Model(nn.Module):
+    def __init__(self, embedding, hidden_units=256, num_layers=24, num_classes=5, choose_from_k=5,
+                 lstm_keep_prob=0.5, cnn_keep_prob=0.5, att_keep_prob=0.5, att_mask=True,
+                 embed_keep_prob=0.5, final_output_keep_prob=1.0, global_pool="avg"):
+        super(Model, self).__init__()
+
+        self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False)
+        self.hidden_units = hidden_units
+        self.num_layers = num_layers
+        self.num_classes = num_classes
+
+        self.init_conv = ConvBN(1, self.embedding.embedding_dim, hidden_units, cnn_keep_prob, False, True)
+
+        self.layers = nn.ModuleList()
+        candidate_keys_pool = []
+        for layer_id in range(self.num_layers):
+            k = "layer_{}".format(layer_id)
+            self.layers.append(Layer(k, candidate_keys_pool, hidden_units, choose_from_k,
+                                     cnn_keep_prob, lstm_keep_prob, att_keep_prob, att_mask))
+            candidate_keys_pool.append(k)
+
+        self.linear_combine = LinearCombine(self.num_layers)
+        self.linear_out = nn.Linear(self.hidden_units, self.num_classes)
+
+        self.embed_dropout = nn.Dropout(p=1 - embed_keep_prob)
+        self.output_dropout = nn.Dropout(p=1 - final_output_keep_prob)
+
+        assert global_pool in ["max", "avg"]
+        if global_pool == "max":
+            self.global_pool = GlobalMaxPool()
+        elif global_pool == "avg":
+            self.global_pool = GlobalAvgPool()
+
+    def forward(self, inputs):
+        sent_ids, mask = inputs
+        seq = self.embedding(sent_ids.long())
+        seq = self.embed_dropout(seq)
+
+        seq = torch.transpose(seq, 1, 2)  # from (N, L, C) -> (N, C, L)
+
+        x = self.init_conv(seq, mask)
+        prev_layers = []
+
+        for layer in self.layers:
+            x = layer(x, prev_layers, mask)
+            prev_layers.append(x)
+
+        x = self.linear_combine(torch.stack(prev_layers))
+        x = self.global_pool(x, mask)
+        x = self.output_dropout(x)
+        x = self.linear_out(x)
+        return x
diff --git a/examples/nas/textnas/ops.py b/examples/nas/textnas/ops.py
new file mode 100644
index 0000000000..4a890e60ae
--- /dev/null
+++ b/examples/nas/textnas/ops.py
@@ -0,0 +1,205 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import torch
+import torch.nn.functional as F
+from torch import nn
+
+from utils import get_length, INF
+
+
+class Mask(nn.Module):
+    def forward(self, seq, mask):
+        # seq: (N, C, L)
+        # mask: (N, L)
+        seq_mask = torch.unsqueeze(mask, 2)
+        seq_mask = torch.transpose(seq_mask.repeat(1, 1, seq.size()[1]), 1, 2)
+        return seq.where(torch.eq(seq_mask, 1), torch.zeros_like(seq))
+
+
+class BatchNorm(nn.Module):
+    def __init__(self, num_features, pre_mask, post_mask, eps=1e-5, decay=0.9, affine=True):
+        super(BatchNorm, self).__init__()
+        self.mask_opt = Mask()
+        self.pre_mask = pre_mask
+        self.post_mask = post_mask
+        self.bn = nn.BatchNorm1d(num_features, eps=eps, momentum=1.0 - decay, affine=affine)
+
+    def forward(self, seq, mask):
+        if self.pre_mask:
+            seq = self.mask_opt(seq, mask)
+        seq = self.bn(seq)
+        if self.post_mask:
+            seq = self.mask_opt(seq, mask)
+        return seq
+
+
+class ConvBN(nn.Module):
+    def __init__(self, kernal_size, in_channels, out_channels, cnn_keep_prob,
+                 pre_mask, post_mask, with_bn=True, with_relu=True):
+        super(ConvBN, self).__init__()
+        self.mask_opt = Mask()
+        self.pre_mask = pre_mask
+        self.post_mask = post_mask
+        self.with_bn = with_bn
+        self.with_relu = with_relu
+        self.conv = nn.Conv1d(in_channels, out_channels, kernal_size, 1, bias=True, padding=(kernal_size - 1) // 2)
+        self.dropout = nn.Dropout(p=(1 - cnn_keep_prob))
+
+        if with_bn:
+            self.bn = BatchNorm(out_channels, not post_mask, True)
+
+        if with_relu:
+            self.relu = nn.ReLU()
+
+    def forward(self, seq, mask):
+        if self.pre_mask:
+            seq = self.mask_opt(seq, mask)
+        seq = self.conv(seq)
+        if self.post_mask:
+            seq = self.mask_opt(seq, mask)
+        if self.with_bn:
+            seq = self.bn(seq, mask)
+        if self.with_relu:
+            seq = self.relu(seq)
+        seq = self.dropout(seq)
+        return seq
+
+
+class AvgPool(nn.Module):
+    def __init__(self, kernal_size, pre_mask, post_mask):
+        super(AvgPool, self).__init__()
+        self.avg_pool = nn.AvgPool1d(kernal_size, 1, padding=(kernal_size - 1) // 2)
+        self.pre_mask = pre_mask
+        self.post_mask = post_mask
+        self.mask_opt = Mask()
+
+    def forward(self, seq, mask):
+        if self.pre_mask:
+            seq = self.mask_opt(seq, mask)
+        seq = self.avg_pool(seq)
+        if self.post_mask:
+            seq = self.mask_opt(seq, mask)
+        return seq
+
+
+class MaxPool(nn.Module):
+    def __init__(self, kernal_size, pre_mask, post_mask):
+        super(MaxPool, self).__init__()
+        self.max_pool = nn.MaxPool1d(kernal_size, 1, padding=(kernal_size - 1) // 2)
+        self.pre_mask = pre_mask
+        self.post_mask = post_mask
+        self.mask_opt = Mask()
+
+    def forward(self, seq, mask):
+        if self.pre_mask:
+            seq = self.mask_opt(seq, mask)
+        seq = self.max_pool(seq)
+        if self.post_mask:
+            seq = self.mask_opt(seq, mask)
+        return seq
+
+
+class Attention(nn.Module):
+    def __init__(self, num_units, num_heads, keep_prob, is_mask):
+        super(Attention, self).__init__()
+        self.num_heads = num_heads
+        self.keep_prob = keep_prob
+
+        self.linear_q = nn.Linear(num_units, num_units)
+        self.linear_k = nn.Linear(num_units, num_units)
+        self.linear_v = nn.Linear(num_units, num_units)
+
+        self.bn = BatchNorm(num_units, True, is_mask)
+        self.dropout = nn.Dropout(p=1 - self.keep_prob)
+
+    def forward(self, seq, mask):
+        in_c = seq.size()[1]
+        seq = torch.transpose(seq, 1, 2)  # (N, L, C)
+        queries = seq
+        keys = seq
+        num_heads = self.num_heads
+
+        # T_q = T_k = L
+        Q = F.relu(self.linear_q(seq))  # (N, T_q, C)
+        K = F.relu(self.linear_k(seq))  # (N, T_k, C)
+        V = F.relu(self.linear_v(seq))  # (N, T_k, C)
+
+        # Split and concat
+        Q_ = torch.cat(torch.split(Q, in_c // num_heads, dim=2), dim=0)  # (h*N, T_q, C/h)
+        K_ = torch.cat(torch.split(K, in_c // num_heads, dim=2), dim=0)  # (h*N, T_k, C/h)
+        V_ = torch.cat(torch.split(V, in_c // num_heads, dim=2), dim=0)  # (h*N, T_k, C/h)
+
+        # Multiplication
+        outputs = torch.matmul(Q_, K_.transpose(1, 2))  # (h*N, T_q, T_k)
+        # Scale
+        outputs = outputs / (K_.size()[-1] ** 0.5)
+        # Key Masking
+        key_masks = mask.repeat(num_heads, 1)  # (h*N, T_k)
+        key_masks = torch.unsqueeze(key_masks, 1)  # (h*N, 1, T_k)
+        key_masks = key_masks.repeat(1, queries.size()[1], 1)  # (h*N, T_q, T_k)
+
+        paddings = torch.ones_like(outputs) * (-INF)  # extremely small value
+        outputs = torch.where(torch.eq(key_masks, 0), paddings, outputs)
+
+        query_masks = mask.repeat(num_heads, 1)  # (h*N, T_q)
+        query_masks = torch.unsqueeze(query_masks, -1)  # (h*N, T_q, 1)
+        query_masks = query_masks.repeat(1, 1, keys.size()[1]).float()  # (h*N, T_q, T_k)
+
+        att_scores = F.softmax(outputs, dim=-1) * query_masks  # (h*N, T_q, T_k)
+        att_scores = self.dropout(att_scores)
+
+        # Weighted sum
+        x_outputs = torch.matmul(att_scores, V_)  # (h*N, T_q, C/h)
+        # Restore shape
+        x_outputs = torch.cat(
+            torch.split(x_outputs, x_outputs.size()[0] // num_heads, dim=0),
+            dim=2)  # (N, T_q, C)
+
+        x = torch.transpose(x_outputs, 1, 2)  # (N, C, L)
+        x = self.bn(x, mask)
+
+        return x
+
+
+class RNN(nn.Module):
+    def __init__(self, hidden_size, output_keep_prob):
+        super(RNN, self).__init__()
+        self.hidden_size = hidden_size
+        self.bid_rnn = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=True)
+        self.output_keep_prob = output_keep_prob
+
+        self.out_dropout = nn.Dropout(p=(1 - self.output_keep_prob))
+
+    def forward(self, seq, mask):
+        # seq: (N, C, L)
+        # mask: (N, L)
+        max_len = seq.size()[2]
+        length = get_length(mask)
+        seq = torch.transpose(seq, 1, 2)  # to (N, L, C)
+        packed_seq = nn.utils.rnn.pack_padded_sequence(seq, length, batch_first=True,
+                                                       enforce_sorted=False)
+        outputs, _ = self.bid_rnn(packed_seq)
+        outputs = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True,
+                                                   total_length=max_len)[0]
+        outputs = outputs.view(-1, max_len, 2, self.hidden_size).sum(2)  # (N, L, C)
+        outputs = self.out_dropout(outputs)  # output dropout
+        return torch.transpose(outputs, 1, 2)  # back to: (N, C, L)
+
+
+class LinearCombine(nn.Module):
+    def __init__(self, layers_num, trainable=True, input_aware=False, word_level=False):
+        super(LinearCombine, self).__init__()
+        self.input_aware = input_aware
+        self.word_level = word_level
+
+        if input_aware:
+            raise NotImplementedError("Input aware is not supported.")
+        self.w = nn.Parameter(torch.full((layers_num, 1, 1, 1), 1.0 / layers_num),
+                              requires_grad=trainable)
+
+    def forward(self, seq):
+        nw = F.softmax(self.w, dim=0)
+        seq = torch.mul(seq, nw)
+        seq = torch.sum(seq, dim=0)
+        return seq
diff --git a/examples/nas/textnas/search.py b/examples/nas/textnas/search.py
new file mode 100644
index 0000000000..78f6a65696
--- /dev/null
+++ b/examples/nas/textnas/search.py
@@ -0,0 +1,89 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+import os
+import random
+from argparse import ArgumentParser
+from itertools import cycle
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from nni.nas.pytorch.enas import EnasMutator, EnasTrainer
+from nni.nas.pytorch.callbacks import LRSchedulerCallback
+
+from dataloader import read_data_sst
+from model import Model
+from utils import accuracy
+
+
+logger = logging.getLogger("nni.textnas")
+
+
+class TextNASTrainer(EnasTrainer):
+    def __init__(self, *args, train_loader=None, valid_loader=None, test_loader=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.train_loader = train_loader
+        self.valid_loader = valid_loader
+        self.test_loader = test_loader
+
+    def init_dataloader(self):
+        pass
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser("textnas")
+    parser.add_argument("--batch-size", default=128, type=int)
+    parser.add_argument("--log-frequency", default=50, type=int)
+    parser.add_argument("--seed", default=1234, type=int)
+    parser.add_argument("--epochs", default=10, type=int)
+    parser.add_argument("--lr", default=5e-3, type=float)
+    args = parser.parse_args()
+
+    torch.manual_seed(args.seed)
+    torch.cuda.manual_seed_all(args.seed)
+    np.random.seed(args.seed)
+    random.seed(args.seed)
+    torch.backends.cudnn.deterministic = True
+
+    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+    train_dataset, valid_dataset, test_dataset, embedding = read_data_sst("data")
+    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True)
+    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True)
+    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, num_workers=4)
+    train_loader, valid_loader = cycle(train_loader), cycle(valid_loader)
+    model = Model(embedding)
+
+    mutator = EnasMutator(model, temperature=None, tanh_constant=None, entropy_reduction="mean")
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, eps=1e-3, weight_decay=2e-6)
+    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs, eta_min=1e-5)
+
+    trainer = TextNASTrainer(model,
+                             loss=criterion,
+                             metrics=lambda output, target: {"acc": accuracy(output, target)},
+                             reward_function=accuracy,
+                             optimizer=optimizer,
+                             callbacks=[LRSchedulerCallback(lr_scheduler)],
+                             batch_size=args.batch_size,
+                             num_epochs=args.epochs,
+                             dataset_train=None,
+                             dataset_valid=None,
+                             train_loader=train_loader,
+                             valid_loader=valid_loader,
+                             test_loader=test_loader,
+                             log_frequency=args.log_frequency,
+                             mutator=mutator,
+                             mutator_lr=2e-3,
+                             mutator_steps=500,
+                             mutator_steps_aggregate=1,
+                             child_steps=3000,
+                             baseline_decay=0.99,
+                             test_arc_per_epoch=10)
+    trainer.train()
+    os.makedirs("checkpoints", exist_ok=True)
+    for i in range(20):
+        trainer.export(os.path.join("checkpoints", "architecture_%02d.json" % i))
diff --git a/examples/nas/textnas/utils.py b/examples/nas/textnas/utils.py
new file mode 100644
index 0000000000..8e49b0cdfb
--- /dev/null
+++ b/examples/nas/textnas/utils.py
@@ -0,0 +1,67 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import logging
+
+import torch
+import torch.nn as nn
+
+INF = 1E10
+EPS = 1E-12
+
+logger = logging.getLogger("nni.textnas")
+
+
+def get_length(mask):
+    length = torch.sum(mask, 1)
+    length = length.long()
+    return length
+
+
+class GlobalAvgPool(nn.Module):
+    def forward(self, x, mask):
+        x = torch.sum(x, 2)
+        length = torch.sum(mask, 1, keepdim=True).float()
+        length += torch.eq(length, 0.0).float() * EPS
+        length = length.repeat(1, x.size()[1])
+        x /= length
+        return x
+
+
+class GlobalMaxPool(nn.Module):
+    def forward(self, x, mask):
+        mask = torch.eq(mask.float(), 0.0).long()
+        mask = torch.unsqueeze(mask, dim=1).repeat(1, x.size()[1], 1)
+        mask *= -INF
+        x += mask
+        x, _ = torch.max(x + mask, 2)
+        return x
+
+
+class IteratorWrapper:
+    def __init__(self, loader):
+        self.loader = loader
+        self.iterator = None
+
+    def __iter__(self):
+        self.iterator = iter(self.loader)
+        return self
+
+    def __len__(self):
+        return len(self.loader)
+
+    def __next__(self):
+        data = next(self.iterator)
+        text, length = data.text
+        max_length = text.size(1)
+        label = data.label - 1
+        bs = label.size(0)
+        mask = torch.arange(max_length, device=length.device).unsqueeze(0).repeat(bs, 1)
+        mask = mask < length.unsqueeze(-1).repeat(1, max_length)
+        return (text, mask), label
+
+
+def accuracy(output, target):
+    batch_size = target.size(0)
+    _, predicted = torch.max(output.data, 1)
+    return (predicted == target).sum().item() / batch_size
diff --git a/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py b/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
index 1f2f9bd7ad..0a983f0a75 100644
--- a/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
+++ b/src/sdk/pynni/nni/nas/pytorch/enas/mutator.py
@@ -30,7 +30,7 @@ def forward(self, inputs, hidden):
 class EnasMutator(Mutator):
 
     def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, cell_exit_extra_step=False,
-                 skip_target=0.4, branch_bias=0.25):
+                 skip_target=0.4, temperature=None, branch_bias=0.25, entropy_reduction="sum"):
         """
         Initialize a EnasMutator.
 
@@ -49,17 +49,22 @@ def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, ce
             and mark it as the hidden state of this MutableScope. This is to align with the original implementation of paper.
         skip_target : float
             Target probability that skipconnect will appear.
+        temperature : float
+            Temperature constant that divides the logits.
         branch_bias : float
             Manual bias applied to make some operations more likely to be chosen.
             Currently this is implemented with a hardcoded match rule that aligns with original repo.
             If a mutable has a ``reduce`` in its key, all its op choices
             that contains `conv` in their typename will receive a bias of ``+self.branch_bias`` initially; while others
             receive a bias of ``-self.branch_bias``.
+        entropy_reduction : str
+            Can be one of ``sum`` and ``mean``. How the entropy of multi-input-choice is reduced.
         """
         super().__init__(model)
         self.lstm_size = lstm_size
         self.lstm_num_layers = lstm_num_layers
         self.tanh_constant = tanh_constant
+        self.temperature = temperature
         self.cell_exit_extra_step = cell_exit_extra_step
         self.skip_target = skip_target
         self.branch_bias = branch_bias
@@ -70,6 +75,8 @@ def __init__(self, model, lstm_size=64, lstm_num_layers=1, tanh_constant=1.5, ce
         self.v_attn = nn.Linear(self.lstm_size, 1, bias=False)
         self.g_emb = nn.Parameter(torch.randn(1, self.lstm_size) * 0.1)
         self.skip_targets = nn.Parameter(torch.tensor([1.0 - self.skip_target, self.skip_target]), requires_grad=False)  # pylint: disable=not-callable
+        assert entropy_reduction in ["sum", "mean"], "Entropy reduction must be one of sum and mean."
+        self.entropy_reduction = torch.sum if entropy_reduction == "sum" else torch.mean
         self.cross_entropy_loss = nn.CrossEntropyLoss(reduction="none")
         self.bias_dict = nn.ParameterDict()
 
@@ -135,15 +142,17 @@ def _mark_anchor(self, key):
     def _sample_layer_choice(self, mutable):
         self._lstm_next_step()
         logit = self.soft(self._h[-1])
+        if self.temperature is not None:
+            logit /= self.temperature
         if self.tanh_constant is not None:
             logit = self.tanh_constant * torch.tanh(logit)
         if mutable.key in self.bias_dict:
             logit += self.bias_dict[mutable.key]
         branch_id = torch.multinomial(F.softmax(logit, dim=-1), 1).view(-1)
         log_prob = self.cross_entropy_loss(logit, branch_id)
-        self.sample_log_prob += torch.sum(log_prob)
+        self.sample_log_prob += self.entropy_reduction(log_prob)
         entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
-        self.sample_entropy += torch.sum(entropy)
+        self.sample_entropy += self.entropy_reduction(entropy)
         self._inputs = self.embedding(branch_id)
         return F.one_hot(branch_id, num_classes=self.max_layer_choice).bool().view(-1)
 
@@ -158,6 +167,8 @@ def _sample_input_choice(self, mutable):
         query = torch.cat(query, 0)
         query = torch.tanh(query + self.attn_query(self._h[-1]))
         query = self.v_attn(query)
+        if self.temperature is not None:
+            query /= self.temperature
         if self.tanh_constant is not None:
             query = self.tanh_constant * torch.tanh(query)
 
@@ -178,7 +189,7 @@ def _sample_input_choice(self, mutable):
             log_prob = self.cross_entropy_loss(logit, index)
             self._inputs = anchors[index.item()]
 
-        self.sample_log_prob += torch.sum(log_prob)
+        self.sample_log_prob += self.entropy_reduction(log_prob)
         entropy = (log_prob * torch.exp(-log_prob)).detach()  # pylint: disable=invalid-unary-operand-type
-        self.sample_entropy += torch.sum(entropy)
+        self.sample_entropy += self.entropy_reduction(entropy)
         return skip.bool()
diff --git a/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py b/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
index 6cd5924f39..ccafbd7c88 100644
--- a/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/enas/trainer.py
@@ -2,11 +2,14 @@
 # Licensed under the MIT license.
 
 import logging
+from itertools import cycle
+
 import torch
+import torch.nn as nn
 import torch.optim as optim
 
 from nni.nas.pytorch.trainer import Trainer
-from nni.nas.pytorch.utils import AverageMeterGroup
+from nni.nas.pytorch.utils import AverageMeterGroup, to_device
 from .mutator import EnasMutator
 
 logger = logging.getLogger(__name__)
@@ -16,8 +19,9 @@ class EnasTrainer(Trainer):
     def __init__(self, model, loss, metrics, reward_function,
                  optimizer, num_epochs, dataset_train, dataset_valid,
                  mutator=None, batch_size=64, workers=4, device=None, log_frequency=None, callbacks=None,
-                 entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999,
-                 mutator_lr=0.00035, mutator_steps_aggregate=20, mutator_steps=50, aux_weight=0.4):
+                 entropy_weight=0.0001, skip_weight=0.8, baseline_decay=0.999, child_steps=500,
+                 mutator_lr=0.00035, mutator_steps_aggregate=20, mutator_steps=50, aux_weight=0.4,
+                 test_arc_per_epoch=1):
         """
         Initialize an EnasTrainer.
 
@@ -57,6 +61,8 @@ def __init__(self, model, loss, metrics, reward_function,
             Weight of skip penalty loss.
         baseline_decay : float
             Decay factor of baseline. New baseline will be equal to ``baseline_decay * baseline_old + reward * (1 - baseline_decay)``.
+        child_steps : int
+            How many mini-batches for model training per epoch.
         mutator_lr : float
             Learning rate for RL controller.
         mutator_steps_aggregate : int
@@ -65,12 +71,16 @@ def __init__(self, model, loss, metrics, reward_function,
             Number of mini-batches for each epoch of RL controller learning.
         aux_weight : float
             Weight of auxiliary head loss. ``aux_weight * aux_loss`` will be added to total loss.
+        test_arc_per_epoch : int
+            How many architectures are chosen for direct test after each epoch.
         """
         super().__init__(model, mutator if mutator is not None else EnasMutator(model),
                          loss, metrics, optimizer, num_epochs, dataset_train, dataset_valid,
                          batch_size, workers, device, log_frequency, callbacks)
         self.reward_function = reward_function
         self.mutator_optim = optim.Adam(self.mutator.parameters(), lr=mutator_lr)
+        self.batch_size = batch_size
+        self.workers = workers
 
         self.entropy_weight = entropy_weight
         self.skip_weight = skip_weight
@@ -78,32 +88,40 @@ def __init__(self, model, loss, metrics, reward_function,
         self.baseline = 0.
         self.mutator_steps_aggregate = mutator_steps_aggregate
         self.mutator_steps = mutator_steps
+        self.child_steps = child_steps
         self.aux_weight = aux_weight
+        self.test_arc_per_epoch = test_arc_per_epoch
+
+        self.init_dataloader()
 
+    def init_dataloader(self):
         n_train = len(self.dataset_train)
         split = n_train // 10
         indices = list(range(n_train))
         train_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[:-split])
         valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(indices[-split:])
         self.train_loader = torch.utils.data.DataLoader(self.dataset_train,
-                                                        batch_size=batch_size,
+                                                        batch_size=self.batch_size,
                                                         sampler=train_sampler,
-                                                        num_workers=workers)
+                                                        num_workers=self.workers)
         self.valid_loader = torch.utils.data.DataLoader(self.dataset_train,
-                                                        batch_size=batch_size,
+                                                        batch_size=self.batch_size,
                                                         sampler=valid_sampler,
-                                                        num_workers=workers)
+                                                        num_workers=self.workers)
         self.test_loader = torch.utils.data.DataLoader(self.dataset_valid,
-                                                       batch_size=batch_size,
-                                                       num_workers=workers)
+                                                       batch_size=self.batch_size,
+                                                       num_workers=self.workers)
+        self.train_loader = cycle(self.train_loader)
+        self.valid_loader = cycle(self.valid_loader)
 
     def train_one_epoch(self, epoch):
         # Sample model and train
         self.model.train()
         self.mutator.eval()
         meters = AverageMeterGroup()
-        for step, (x, y) in enumerate(self.train_loader):
-            x, y = x.to(self.device), y.to(self.device)
+        for step in range(1, self.child_steps + 1):
+            x, y = next(self.train_loader)
+            x, y = to_device(x, self.device), to_device(y, self.device)
             self.optimizer.zero_grad()
 
             with torch.no_grad():
@@ -119,55 +137,71 @@ def train_one_epoch(self, epoch):
             loss = self.loss(logits, y)
             loss = loss + self.aux_weight * aux_loss
             loss.backward()
+            nn.utils.clip_grad_norm_(self.model.parameters(), 5.)
             self.optimizer.step()
             metrics["loss"] = loss.item()
             meters.update(metrics)
 
             if self.log_frequency is not None and step % self.log_frequency == 0:
-                logger.info("Model Epoch [%s/%s] Step [%s/%s]  %s", epoch + 1,
-                            self.num_epochs, step + 1, len(self.train_loader), meters)
+                logger.info("Model Epoch [%d/%d] Step [%d/%d]  %s", epoch + 1,
+                            self.num_epochs, step, self.child_steps, meters)
 
         # Train sampler (mutator)
         self.model.eval()
         self.mutator.train()
         meters = AverageMeterGroup()
-        mutator_step, total_mutator_steps = 0, self.mutator_steps * self.mutator_steps_aggregate
-        while mutator_step < total_mutator_steps:
-            for step, (x, y) in enumerate(self.valid_loader):
-                x, y = x.to(self.device), y.to(self.device)
+        for mutator_step in range(1, self.mutator_steps + 1):
+            self.mutator_optim.zero_grad()
+            for step in range(1, self.mutator_steps_aggregate + 1):
+                x, y = next(self.valid_loader)
+                x, y = to_device(x, self.device), to_device(y, self.device)
 
                 self.mutator.reset()
                 with torch.no_grad():
                     logits = self.model(x)
                 metrics = self.metrics(logits, y)
                 reward = self.reward_function(logits, y)
-                if self.entropy_weight is not None:
-                    reward += self.entropy_weight * self.mutator.sample_entropy
+                if self.entropy_weight:
+                    reward += self.entropy_weight * self.mutator.sample_entropy.item()
                 self.baseline = self.baseline * self.baseline_decay + reward * (1 - self.baseline_decay)
-                self.baseline = self.baseline.detach().item()
                 loss = self.mutator.sample_log_prob * (reward - self.baseline)
                 if self.skip_weight:
                     loss += self.skip_weight * self.mutator.sample_skip_penalty
                 metrics["reward"] = reward
                 metrics["loss"] = loss.item()
                 metrics["ent"] = self.mutator.sample_entropy.item()
+                metrics["log_prob"] = self.mutator.sample_log_prob.item()
                 metrics["baseline"] = self.baseline
                 metrics["skip"] = self.mutator.sample_skip_penalty
 
-                loss = loss / self.mutator_steps_aggregate
+                loss /= self.mutator_steps_aggregate
                 loss.backward()
                 meters.update(metrics)
 
-                if mutator_step % self.mutator_steps_aggregate == 0:
-                    self.mutator_optim.step()
-                    self.mutator_optim.zero_grad()
+                cur_step = step + (mutator_step - 1) * self.mutator_steps_aggregate
+                if self.log_frequency is not None and cur_step % self.log_frequency == 0:
+                    logger.info("RL Epoch [%d/%d] Step [%d/%d] [%d/%d]  %s", epoch + 1, self.num_epochs,
+                                mutator_step, self.mutator_steps, step, self.mutator_steps_aggregate,
+                                meters)
 
-                if self.log_frequency is not None and step % self.log_frequency == 0:
-                    logger.info("RL Epoch [%s/%s] Step [%s/%s]  %s", epoch + 1, self.num_epochs,
-                                mutator_step // self.mutator_steps_aggregate + 1, self.mutator_steps, meters)
-                mutator_step += 1
-                if mutator_step >= total_mutator_steps:
-                    break
+            nn.utils.clip_grad_norm_(self.mutator.parameters(), 5.)
+            self.mutator_optim.step()
 
     def validate_one_epoch(self, epoch):
-        pass
+        with torch.no_grad():
+            for arc_id in range(self.test_arc_per_epoch):
+                meters = AverageMeterGroup()
+                for x, y in self.test_loader:
+                    x, y = to_device(x, self.device), to_device(y, self.device)
+                    self.mutator.reset()
+                    logits = self.model(x)
+                    if isinstance(logits, tuple):
+                        logits, _ = logits
+                    metrics = self.metrics(logits, y)
+                    loss = self.loss(logits, y)
+                    metrics["loss"] = loss.item()
+                    meters.update(metrics)
+
+                logger.info("Test Epoch [%d/%d] Arc [%d/%d] Summary  %s",
+                            epoch + 1, self.num_epochs, arc_id + 1, self.test_arc_per_epoch,
+                            meters.summary())
diff --git a/src/sdk/pynni/nni/nas/pytorch/mutables.py b/src/sdk/pynni/nni/nas/pytorch/mutables.py
index 780947d9f8..778f247d0d 100644
--- a/src/sdk/pynni/nni/nas/pytorch/mutables.py
+++ b/src/sdk/pynni/nni/nas/pytorch/mutables.py
@@ -159,7 +159,7 @@ def __init__(self, n_candidates=None, choose_from=None, n_chosen=None,
                                                                   "than number of candidates."
 
         self.n_candidates = n_candidates
-        self.choose_from = choose_from
+        self.choose_from = choose_from.copy()
         self.n_chosen = n_chosen
         self.reduction = reduction
         self.return_mask = return_mask
diff --git a/src/sdk/pynni/nni/nas/pytorch/trainer.py b/src/sdk/pynni/nni/nas/pytorch/trainer.py
index 218d6a2d50..32888d9bf9 100644
--- a/src/sdk/pynni/nni/nas/pytorch/trainer.py
+++ b/src/sdk/pynni/nni/nas/pytorch/trainer.py
@@ -96,12 +96,12 @@ def train(self, validate=True):
                 callback.on_epoch_begin(epoch)
 
             # training
-            _logger.info("Epoch %d Training", epoch)
+            _logger.info("Epoch %d Training", epoch + 1)
             self.train_one_epoch(epoch)
 
             if validate:
                 # validation
-                _logger.info("Epoch %d Validating", epoch)
+                _logger.info("Epoch %d Validating", epoch + 1)
                 self.validate_one_epoch(epoch)
 
             for callback in self.callbacks:
diff --git a/src/sdk/pynni/nni/nas/pytorch/utils.py b/src/sdk/pynni/nni/nas/pytorch/utils.py
index 71cd2f8a0a..007c28a902 100644
--- a/src/sdk/pynni/nni/nas/pytorch/utils.py
+++ b/src/sdk/pynni/nni/nas/pytorch/utils.py
@@ -4,6 +4,8 @@
 import logging
 from collections import OrderedDict
 
+import torch
+
 _counter = 0
 
 _logger = logging.getLogger(__name__)
@@ -15,7 +17,22 @@ def global_mutable_counting():
     return _counter
 
 
+def to_device(obj, device):
+    if torch.is_tensor(obj):
+        return obj.to(device)
+    if isinstance(obj, tuple):
+        return tuple(to_device(t, device) for t in obj)
+    if isinstance(obj, list):
+        return [to_device(t, device) for t in obj]
+    if isinstance(obj, dict):
+        return {k: to_device(v, device) for k, v in obj.items()}
+    if isinstance(obj, (int, float, str)):
+        return obj
+    raise ValueError("'%s' has unsupported type '%s'" % (obj, type(obj)))
+
+
 class AverageMeterGroup:
+    """Average meter group for multiple average meters"""
 
     def __init__(self):
         self.meters = OrderedDict()
@@ -33,7 +50,10 @@ def __getitem__(self, item):
         return self.meters[item]
 
     def __str__(self):
-        return "  ".join(str(v) for _, v in self.meters.items())
+        return "  ".join(str(v) for v in self.meters.values())
+
+    def summary(self):
+        return "  ".join(v.summary() for v in self.meters.values())
 
 
 class AverageMeter:
@@ -72,6 +92,10 @@ def __str__(self):
         fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
         return fmtstr.format(**self.__dict__)
 
+    def summary(self):
+        fmtstr = '{name}: {avg' + self.fmt + '}'
+        return fmtstr.format(**self.__dict__)
+
 
 class StructuredMutableTreeNode:
     """

From ae81ec47ea654c819ccddae1f7e1ff0465cbd2ba Mon Sep 17 00:00:00 2001
From: Chi Song <27178119+squirrelsc@users.noreply.github.com>
Date: Mon, 6 Jan 2020 13:33:13 +0800
Subject: [PATCH 19/23] update document title for SEO (#1920)

---
 docs/en_US/conf.py   | 10 ++++++----
 docs/en_US/index.rst |  6 +++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/en_US/conf.py b/docs/en_US/conf.py
index d7cd21b58c..73286ad0a1 100644
--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -12,16 +12,16 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+from recommonmark.transform import AutoStructify
+from recommonmark.parser import CommonMarkParser
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
 
-from recommonmark.parser import CommonMarkParser
-from recommonmark.transform import AutoStructify
 
 # -- Project information ---------------------------------------------------
 
-project = 'Neural Network Intelligence'
+project = 'NNI'
 copyright = '2020, Microsoft'
 author = 'Microsoft'
 
@@ -109,6 +109,8 @@
 # html_sidebars = {}
 
 html_logo = '../img/nni_logo_dark.png'
+html_title = 'An open source AutoML toolkit for neural architecture search, model compression and hyper-parameter tuning (%s %s)' % \
+    (project, release)
 
 # -- Options for HTMLHelp output ---------------------------------------------
 
@@ -190,6 +192,6 @@ def setup(app):
     app.add_config_value('recommonmark_config', {
         'enable_eval_rst': True,
         'enable_auto_toc_tree': False,
-            }, True)
+    }, True)
     app.add_transform(AutoStructify)
     app.add_stylesheet('css/custom.css')
diff --git a/docs/en_US/index.rst b/docs/en_US/index.rst
index 0cc3499f0d..54d84c6e38 100644
--- a/docs/en_US/index.rst
+++ b/docs/en_US/index.rst
@@ -1,6 +1,6 @@
-#########################################
-Neural Network Intelligence Documentation
-#########################################
+###########################
+Neural Network Intelligence
+###########################
 
 ********
 Contents

From 226aaef9f43c24df758df9791568d9c49f4c5a6e Mon Sep 17 00:00:00 2001
From: Chi Song <27178119+squirrelsc@users.noreply.github.com>
Date: Tue, 7 Jan 2020 11:19:30 +0800
Subject: [PATCH 20/23] Chinese Translation (#1914)

---
 README.Makefile_zh_CN.md                      |   2 +-
 README_zh_CN.md                               |  11 +-
 docs/zh_CN/AdvancedFeature/AdvancedNas.md     | 104 --------
 .../AdvancedFeature/GeneralNasInterfaces.md   | 237 ------------------
 docs/zh_CN/Assessor/BuiltinAssessor.md        |   8 +-
 docs/zh_CN/Assessor/CustomizeAssessor.md      |   4 +-
 docs/zh_CN/Compressor/Overview.md             | 141 ++++++++---
 docs/zh_CN/Compressor/Pruner.md               | 159 ++++++++++--
 docs/zh_CN/Compressor/Quantizer.md            |  87 +++++--
 .../{L1FilterPruner.md => l1filterpruner.md}  |  24 +-
 docs/zh_CN/FeatureEngineering/Overview.md     |  13 +-
 docs/zh_CN/NAS/Overview.md                    |   7 +
 docs/zh_CN/NAS/PDARTS.md                      |  18 ++
 docs/zh_CN/NAS/SPOS.md                        | 118 +++++++++
 docs/zh_CN/Overview.md                        |   2 +-
 docs/zh_CN/Release.md                         |  16 +-
 .../FrameworkControllerMode.md                |   4 +-
 docs/zh_CN/TrainingService/LocalMode.md       |   8 +-
 docs/zh_CN/TrainingService/PaiMode.md         |  67 +----
 docs/zh_CN/TrainingService/PaiYarnMode.md     | 128 ++++++++++
 docs/zh_CN/TrialExample/Cifar10Examples.md    |  12 +-
 docs/zh_CN/TrialExample/GbdtExample.md        |   2 +-
 docs/zh_CN/TrialExample/MnistExamples.md      |  20 +-
 docs/zh_CN/TrialExample/SklearnExamples.md    |   6 +-
 .../TrialExample/SquadEvolutionExamples.md    |  10 +-
 docs/zh_CN/TrialExample/Trials.md             |   8 +-
 docs/zh_CN/Tuner/BuiltinTuner.md              |  21 +-
 docs/zh_CN/Tuner/CustomizeAdvisor.md          |   4 +-
 docs/zh_CN/Tuner/CustomizeTuner.md            |   6 +-
 docs/zh_CN/Tuner/HyperbandAdvisor.md          |   6 +-
 docs/zh_CN/Tuner/NetworkmorphismTuner.md      |   6 +-
 docs/zh_CN/Tutorial/AnnotationSpec.md         |   6 +-
 docs/zh_CN/Tutorial/ExperimentConfig.md       |   2 +-
 docs/zh_CN/Tutorial/Nnictl.md                 |   8 +-
 docs/zh_CN/Tutorial/QuickStart.md             |   2 +-
 docs/zh_CN/Tutorial/SearchSpaceSpec.md        |   4 +-
 docs/zh_CN/conf.py                            |  14 +-
 docs/zh_CN/examples.rst                       |   2 +-
 docs/zh_CN/index.rst                          |   6 +-
 examples/assessors/README_zh_CN.md            |   2 +-
 examples/nas/darts/README_zh_CN.md            |   1 +
 examples/nas/enas/README_zh_CN.md             |   1 +
 examples/nas/naive/README_zh_CN.md            |   1 +
 examples/nas/pdarts/README_zh_CN.md           |   1 +
 examples/nas/spos/README_zh_CN.md             |   1 +
 examples/nas/textnas/README_zh_CN.md          |  45 ++++
 examples/trials/README_zh_CN.md               |   4 +-
 .../trials/cifar10_pytorch/README_zh_CN.md    |   4 +-
 examples/trials/ga_squad/README_zh_CN.md      |  12 +-
 .../trials/kaggle-tgs-salt/README_zh_CN.md    |   2 +-
 .../trials/network_morphism/README_zh_CN.md   |  14 +-
 tools/nni_annotation/README_zh_CN.md          |   6 +-
 52 files changed, 790 insertions(+), 607 deletions(-)
 delete mode 100644 docs/zh_CN/AdvancedFeature/AdvancedNas.md
 delete mode 100644 docs/zh_CN/AdvancedFeature/GeneralNasInterfaces.md
 rename docs/zh_CN/Compressor/{L1FilterPruner.md => l1filterpruner.md} (66%)
 create mode 100644 docs/zh_CN/NAS/PDARTS.md
 create mode 100644 docs/zh_CN/NAS/SPOS.md
 create mode 100644 docs/zh_CN/TrainingService/PaiYarnMode.md
 create mode 100644 examples/nas/darts/README_zh_CN.md
 create mode 100644 examples/nas/enas/README_zh_CN.md
 create mode 100644 examples/nas/naive/README_zh_CN.md
 create mode 100644 examples/nas/pdarts/README_zh_CN.md
 create mode 100644 examples/nas/spos/README_zh_CN.md
 create mode 100644 examples/nas/textnas/README_zh_CN.md

diff --git a/README.Makefile_zh_CN.md b/README.Makefile_zh_CN.md
index 7f80c04511..c1ac4bd375 100644
--- a/README.Makefile_zh_CN.md
+++ b/README.Makefile_zh_CN.md
@@ -69,7 +69,7 @@ Python 包使用 setuptools 安装，所以安装路径依赖于 Python 配置
 | ------------------ | ------------------------------------------ |
 | `easy-install`     | 安装依赖项，生成，安装 NNI，并编辑 `~/.bashrc`            |
 | `dev-easy-install` | 安装依赖项，生成，将 NNI 作为符号链接来安装，并编辑 `~/.bashrc`   |
-| `install`          | 安装 Python 包，Node.js 模块，NNI 脚本和样例           |
+| `install`          | 安装 Python 包，Node.js 模块，NNI 脚本和示例           |
 | `dev-install`      | 将 Python 和 Node.js 模块作为符号链接安装，然后安装 scripts |
 | `pip-install`      | 安装依赖项，生成，安装 NNI，但不安装 Python 包              |
 
diff --git a/README_zh_CN.md b/README_zh_CN.md
index 0807cf1c75..ec77fcbd50 100644
--- a/README_zh_CN.md
+++ b/README_zh_CN.md
@@ -19,7 +19,7 @@ NNI 管理自动机器学习 (AutoML) 的 Experiment，**调度运行**由调优
 * 想要更容易**实现或试验新的自动机器学习算法**的研究员或数据科学家，包括：超参调优算法，神经网络搜索算法以及模型压缩算法。
 * 在机器学习平台中**支持自动机器学习**。
 
-### **NNI v1.2 已发布！ &nbsp;[<img width="48" src="docs/img/release_icon.png" />](#nni-released-reminder)**
+### **NNI v1.3 已发布！ &nbsp;[<img width="48" src="docs/img/release_icon.png" />](#nni-released-reminder)**
 
 ## **NNI 功能一览**
 
@@ -224,7 +224,7 @@ Linux 和 MacOS
 * 在 `python >= 3.5` 的环境中运行命令： `git` 和 `wget`，确保安装了这两个组件。
 
 ```bash
-    git clone -b v1.2 https://github.com/Microsoft/nni.git
+    git clone -b v1.3 https://github.com/Microsoft/nni.git
     cd nni
     source install.sh
 ```
@@ -234,7 +234,7 @@ Windows
 * 在 `python >=3.5` 的环境中运行命令： `git` 和 `PowerShell`，确保安装了这两个组件。
 
 ```bash
-  git clone -b v1.2 https://github.com/Microsoft/nni.git
+  git clone -b v1.3 https://github.com/Microsoft/nni.git
   cd nni
   powershell -ExecutionPolicy Bypass -file install.ps1
 ```
@@ -250,7 +250,7 @@ Windows 上参考 [Windows 上使用 NNI](docs/zh_CN/Tutorial/NniOnWindows.md)
 * 通过克隆源代码下载示例。
 
 ```bash
-    git clone -b v1.2 https://github.com/Microsoft/nni.git
+    git clone -b v1.3 https://github.com/Microsoft/nni.git
 ```
 
 Linux 和 MacOS
@@ -326,7 +326,7 @@ You can use these commands to get more information about the experiment
 * [如何调试](docs/zh_CN/Tutorial/HowToDebug.md)
 * [自定义 Tuner](docs/zh_CN/Tuner/CustomizeTuner.md)
 * [实现定制的训练平台](docs/zh_CN/TrainingService/HowToImplementTrainingService.md)
-* [在 NNI 上实现新的 NAS Trainer](https://github.com/microsoft/nni/blob/master/docs/en_US/NAS/NasInterface.md#implement-a-new-nas-trainer-on-nni)
+* [在 NNI 上实现新的 NAS Trainer](https://github.com/microsoft/nni/blob/master/docs/zh_CN/NAS/NasInterface.md#implement-a-new-nas-trainer-on-nni)
 * [自定义 Advisor](docs/zh_CN/Tuner/CustomizeAdvisor.md)
 
 ## **其它代码库和参考**
@@ -349,6 +349,7 @@ You can use these commands to get more information about the experiment
    * [使用 NNI 为 SPTAG 自动调参](docs/zh_CN/CommunitySharings/SptagAutoTune.md)
    * [使用 NNI 为 scikit-learn 查找超参](https://towardsdatascience.com/find-thy-hyper-parameters-for-scikit-learn-pipelines-using-microsoft-nni-f1015b1224c1)
    * **博客** - [AutoML 工具（Advisor，NNI 与 Google Vizier）的对比](http://gaocegege.com/Blog/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/katib-new#%E6%80%BB%E7%BB%93%E4%B8%8E%E5%88%86%E6%9E%90) 作者：[@gaocegege](https://github.com/gaocegege) - kubeflow/katib 的设计与实现的总结与分析章节
+   * **Blog (中文)** - [NNI 2019 新功能汇总](https://mp.weixin.qq.com/s/7_KRT-rRojQbNuJzkjFMuA) by @squirrelsc
 
 ## **反馈**
 
diff --git a/docs/zh_CN/AdvancedFeature/AdvancedNas.md b/docs/zh_CN/AdvancedFeature/AdvancedNas.md
deleted file mode 100644
index a36f0e509d..0000000000
--- a/docs/zh_CN/AdvancedFeature/AdvancedNas.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# 高级神经网络架构搜索教程
-
-目前，许多 NAS（Neural Architecture Search，神经网络架构搜索）算法都在 Trial 上使用了 **权重共享（weight sharing）** 的方法来加速训练过程。 例如，[ENAS](https://arxiv.org/abs/1802.03268) 与以前的 [NASNet](https://arxiv.org/abs/1707.07012) 算法相比，通过'*子模型间的参数共享（parameter sharing between child models）*'提高了 1000 倍的效率。 而例如 [DARTS](https://arxiv.org/abs/1806.09055), [Network Morphism](https://arxiv.org/abs/1806.10282), 和 [Evolution](https://arxiv.org/abs/1703.01041) 等算法也利用或者隐式的利用了权重共享。
-
-本教程介绍了如何使用权重共享。
-
-## 权重共享
-
-推荐通过 NFS （Network File System）进行权重共享，它是轻量、相对高效的多机共享文件方案。 欢迎社区来共享更多高效的技术。
-
-### 通过 NFS 文件使用权重共享
-
-使用 NFS 配置（见下文），Trial 代码可以通过读写文件来共享模型权重。 建议使用 Tuner 的存储路径：
-
-```yaml
-tuner:
-  codeDir: path/to/customer_tuner
-  classFileName: customer_tuner.py
-  className: CustomerTuner
-  classArgs:
-    ...
-    save_dir_root: /nfs/storage/path/
-```
-
-并让 Tuner 来决定在什么路径读写权重文件，通过 `nni.get_next_parameters()` 来获取路径：
-
-<img src="https://user-images.githubusercontent.com/23273522/51817667-93ebf080-2306-11e9-8395-b18b322062bc.png" alt="drawing" width="700" />
-
-例如，在 Tensorflow 中：
-
-```python
-# 保存 models
-saver = tf.train.Saver()
-saver.save(sess, os.path.join(params['save_path'], 'model.ckpt'))
-# 读取 models
-tf.init_from_checkpoint(params['restore_path'])
-```
-
-超参中的 `'save_path'` 和 `'restore_path'` 可以通过 Tuner 来管理。
-
-### NFS 配置
-
-NFS 使用了客户端/服务器架构。通过一个 NFS 服务器来提供物理存储，远程计算机上的 Trial 使用 NFS 客户端来读写文件，操作上和本地文件相同。
-
-#### NFS 服务器
-
-如果有足够的存储空间，并能够让 NNI 的 Trial 通过**远程机器**来连接，NFS 服务可以安装在任何计算机上。 通常，可以选择一台远程服务器作为 NFS 服务。
-
-在 Ubuntu 上，可通过 `apt-get` 安装 NFS 服务：
-
-```bash
-sudo apt-get install nfs-kernel-server
-```
-
-假设 `/tmp/nni/shared` 是物理存储位置，然后运行：
-
-```bash
-mkdir -p /tmp/nni/shared
-sudo echo "/tmp/nni/shared *(rw,sync,no_subtree_check,no_root_squash)" >> /etc/exports
-sudo service nfs-kernel-server restart
-```
-
-可以通过命令 `sudo showmount -e localhost` 来检查上述目录是否通过 NFS 成功导出了
-
-#### NFS 客户端
-
-为了通过 NFS 访问远程共享文件，需要安装 NFS 客户端。 例如，在 Ubuntu 上运行：
-
-```bash
-sudo apt-get install nfs-common
-```
-
-然后创建并装载上共享目录：
-
-```bash
-mkdir -p /mnt/nfs/nni/
-sudo mount -t nfs 10.10.10.10:/tmp/nni/shared /mnt/nfs/nni
-```
-
-实际使用时，IP `10.10.10.10` 需要替换为 NFS 服务器的真实地址。
-
-## Trial 依赖控制的异步 Dispatcher 模式
-
-多机间启用权重的 Trial，一般是通过**先写后读**的方式来保持一致性。 子节点在父节点的 Trial 完成训练前，不应该读取父节点模型。 要解决这个问题，要通过 `multiThread: true` 来启用**异步调度模式**。在 `config.yml` 中，每次收到 `NEW_TRIAL` 请求，分派一个新的 Trial 时，Tuner 线程可以决定是否阻塞当前线程。 例如：
-
-```python
-    def generate_parameters(self, parameter_id):
-        self.thread_lock.acquire()
-        indiv = # 新 Trial 的配置
-        self.events[parameter_id] = threading.Event()
-        self.thread_lock.release()
-        if indiv.parent_id is not None:
-            self.events[indiv.parent_id].wait()
-
-    def receive_trial_result(self, parameter_id, parameters, reward):
-        self.thread_lock.acquire()
-        # 处理 Trial 结果的配置
-        self.thread_lock.release()
-        self.events[parameter_id].set()
-```
-
-## 样例
-
-详细内容参考：[简单的参数共享样例](https://github.com/Microsoft/nni/tree/master/test/async_sharing_test)。 基于已有的 [ga_squad](https://github.com/Microsoft/nni/tree/master/examples/trials/ga_squad) 样例，还提供了新的 [样例](https://github.com/Microsoft/nni/tree/master/examples/trials/weight_sharing/ga_squad)。
\ No newline at end of file
diff --git a/docs/zh_CN/AdvancedFeature/GeneralNasInterfaces.md b/docs/zh_CN/AdvancedFeature/GeneralNasInterfaces.md
deleted file mode 100644
index c002614c27..0000000000
--- a/docs/zh_CN/AdvancedFeature/GeneralNasInterfaces.md
+++ /dev/null
@@ -1,237 +0,0 @@
-# 神经网络架构搜索的 NNI 编程接口（NAS）
-
-** 这是**实验性的功能**。 目前，仅实现了通用的 NAS 编程接口。 在随后的版本中会支持权重共享。*
-
-自动化的神经网络架构（NAS）搜索在寻找更好的模型方面发挥着越来越重要的作用。 最近的研究工作证明了自动化 NAS 的可行性，并发现了一些超越手动设计和调整的模型。 代表算法有 [NASNet](https://arxiv.org/abs/1707.07012)，[ENAS](https://arxiv.org/abs/1802.03268)，[DARTS](https://arxiv.org/abs/1806.09055)，[Network Morphism](https://arxiv.org/abs/1806.10282)，以及 [Evolution](https://arxiv.org/abs/1703.01041) 等。 新的算法还在不断涌现。 然而，实现这些算法需要很大的工作量，且很难重用其它算法的代码库来实现。
-
-要促进 NAS 创新（例如，设计实现新的 NAS 模型，并列比较不同的 NAS 模型），易于使用且灵活的编程接口非常重要。
-
-<a name="ProgInterface"></a>
-
-## 编程接口
-
-在两种场景下需要用于设计和搜索模型的新的编程接口。 1) 在设计神经网络时，层、子模型或连接有多个可能，并且不确定哪一个或哪种组合表现最好。 如果有一种简单的方法来表达想要尝试的候选层、子模型，将会很有价值。 2) 研究自动化 NAS 时，需要统一的方式来表达神经网络架构的搜索空间， 并在不改变 Trial 代码的情况下来使用不同的搜索算法。
-
-本文基于 [NNI Annotation](../Tutorial/AnnotationSpec.md) 实现了简单灵活的编程接口 。 通过以下示例来详细说明。
-
-### 示例：为层选择运算符
-
-在设计此模型时，第四层的运算符有多个可能的选择，会让模型有更好的表现。 如图所示，在模型代码中可以对第四层使用 Annotation。 此 Annotation 中，共有五个字段：
-
-![](../../img/example_layerchoice.png)
-
-* **layer_choice**：它是函数调用的 list，每个函数都要在代码或导入的库中实现。 函数的输入参数格式为：`def XXX (input, arg2, arg3, ...)`，其中输入是包含了两个元素的 list。 其中一个是 `fixed_inputs` 的 list，另一个是 `optional_inputs` 中选择输入的 list。 `conv` 和 `pool` 是函数示例。 对于 list 中的函数调用，无需写出第一个参数（即 input）。 注意，只会从这些函数调用中选择一个来执行。
-* **fixed_inputs** ：它是变量的 list，可以是前一层输出的张量。 也可以是此层之前的另一个 `nni.mutable_layer` 的 `layer_output`，或此层之前的其它 Python 变量。 list 中的所有变量将被输入 `layer_choice` 中选择的函数（作为输入 list 的第一个元素）。
-* **optional_inputs** ：它是变量的 list，可以是前一层的输出张量。 也可以是此层之前的另一个 `nni.mutable_layer` 的 `layer_output`，或此层之前的其它 Python 变量。 只有 `optional_input_size` 变量被输入 `layer_choice` 到所选的函数 （作为输入 list 的第二个元素）。
-* **optional_input_size** ：它表示从 `input_candidates` 中选择多少个输入。 它可以是一个数字，也可以是一个范围。 范围 [1, 3] 表示选择 1、2 或 3 个输入。
-* **layer_output** ：表示输出的名称。本例中，表示 `layer_choice` 选择的函数的返回值。 这是一个变量名，可以在随后的 Python 代码或 `nni.mutable_layer` 中使用。
-
-此示例有两种写 Annotation 的方法。 对于上面的示例，输入函数的形式是 `[[], [out3]]` 。 对于下面的示例，输入的形式是 `[[out3], []]`。
-
-**调试**：`nnictl trial codegen` 命令可帮助调试 NAS 编程接口。 如果 Experiment `YYY` 中的 Trial 的 `XXX` 出错了，可以运行 `nnictl trial codegen YYY --trial_id XXX` 在当前目录下生成这个 Trial 的可执行代码。 通过运行此代码，可以不需要 NNI 就能调试 Trial 失败的原因。 此命令会编译 Trial 代码，并用实际选择的层次和输入来替换 NNI 的 NAS 代码。
-
-### 示例：为层选择输入的连接
-
-设计层的连接对于制作高性能模型至关重要。 通过此接口，可选择一个层可以采用哪些连接来作为输入。 可以从一组连接中选择几个。 下面的示例从三个候选输入中为 `concat` 这个函数选择两个输入 。 `concat` 还会使用 `fixed_inputs` 获取其上一层的输出 。
-
-![](../../img/example_connectchoice.png)
-
-### 示例：同时选择运算符和连接
-
-此示例从三个运算符中选择一个，并为其选择两个连接作为输入。 由于输入会有多个变量,，在函数的开头需要调用 `concat` 。
-
-![](../../img/example_combined.png)
-
-### 示例：[ENAS](https://arxiv.org/abs/1802.03268) 宏搜索空间
-
-为了证明编程接口带来的便利，使用该接口来实现 “ENAS + 宏搜索空间” 的 Trial 代码。 左图是 ENAS 论文中的宏搜索空间。
-
-![](../../img/example_enas.png)
-
-## 统一的 NAS 搜索空间说明
-
-通过上面的 Annotation 更新 Trial 代码后，即在代码中隐式指定了神经网络架构的搜索空间。 基于该代码，NNI 将自动生成一个搜索空间文件，可作为调优算法的输入。 搜索空间文件遵循以下 JSON 格式。
-
-```javascript
-{
-    "mutable_1": {
-        "_type": "mutable_layer",
-        "_value": {
-            "layer_1": {
-                "layer_choice": ["conv(ch=128)", "pool", "identity"],
-                "optional_inputs": ["out1", "out2", "out3"],
-                "optional_input_size": 2
-            },
-            "layer_2": {
-                ...
-            }
-        }
-    }
-}
-```
-
-相应生成的神经网络结构（由调优算法生成）如下：
-
-```javascript
-{
-    "mutable_1": {
-        "layer_1": {
-            "chosen_layer": "pool",
-            "chosen_inputs": ["out1", "out3"]
-        },
-        "layer_2": {
-            ...
-        }
-    }
-}
-```
-
-通过对搜索空间格式和体系结构选择 (choice) 表达式的说明，可以自由地在 NNI 上实现神经体系结构搜索的各种或通用的调优算法。 接下来的工作会提供一个通用的 NAS 算法。
-
-## 支持 One-Shot NAS
-
-One-Shot NAS 是流行的，能在有限的时间和资源预算内找到较好的神经网络结构的方法。 本质上，它会基于搜索空间来构建完整的图，并使用梯度下降最终找到最佳子图。 它有不同的训练方法，如：[training subgraphs (per mini-batch)](https://arxiv.org/abs/1802.03268) ，[training full graph through dropout](http://proceedings.mlr.press/v80/bender18a/bender18a.pdf)，以及 [training with architecture weights (regularization)](https://arxiv.org/abs/1806.09055) 。
-
-如上所示，NNI 支持通用的 NAS。 从用户角度来看，One-Shot NAS 和 NAS 具有相同的搜索空间规范，因此，它们可以使用相同的编程接口，只是在训练模式上有所不同。 NNI 提供了四种训练模式：
-
-***classic_mode***: [上文](#ProgInterface)对此模式有相应的描述，每个子图是一个 Trial 任务。 要使用此模式，需要启用 NNI Annotation，并在 Experiment 配置文件中为 NAS 指定一个 Tuner。 [这里](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas)是如何实现 Trial 和配置文件的例子。 [这里](https://github.com/microsoft/nni/tree/master/examples/tuners/random_nas_tuner)是一个简单的 NAS Tuner。
-
-***enas_mode***: 参考 [ENAS 论文](https://arxiv.org/abs/1802.03268)的训练方法。 它基于神经网络架构搜索空间来构建全图，每个 mini-batch 只激活一个子图。 [详细说明](#ENASMode)。 （当前仅支持 TensorFlow）。
-
-要使用 enas_mode，需要在配置的 `trial` 部分增加如下字段。
-
-```diff
-trial:
-    command: 运行 Trial 的命令
-    codeDir: Trial 代码的目录
-    gpuNum: 每个 Trial 所需要的 GPU 数量
-
-+   #choice: classic_mode, enas_mode, oneshot_mode
-+   nasMode: enas_mode
-```
-
-与 classic_mode 类似，在 enas_mode 中，需要为 NAS 指定 Tuner，其会从 Tuner（或者论文中的术语：Controller）中接收子图。 由于 Trial 任务要从 Tuner 中接收多个子图，每个子图用于一个 mini-batch，需要在 Trial 代码中增加两行来接收下一个子图（`nni.training_update`），并返回当前子图的结果。 示例如下：
-
-```python
-for _ in range(num):
-    # 接收并启用一个新的子图
-    """@nni.training_update(tf=tf, session=self.session)"""
-    loss, _ = self.session.run([loss_op, train_op])
-    # 返回这个 mini-batch 的损失值
-    """@nni.report_final_result(loss)"""
-```
-
-在这里，`nni.training_update`用来在全图上进行更新。 在 enas_mode 中，更新表示接收一个子图，并在下一个 mini-batch 中启用它。 在 darts_mode 中，更新表示训练架构权重（参考 darts_mode 中的详细说明）。 在 enas_mode 中，需要将导入的 TensorFlow 包传入 `tf`，并将会话传入 `session`。
-
-***oneshot_mode***: 遵循[论文](http://proceedings.mlr.press/v80/bender18a/bender18a.pdf)中的训练方法。 与 enas_mode 通过训练大量子图来训练全图有所不同，oneshot_mode 中构建了全图，并将 dropout 添加到候选的输入以及候选的输出操作中。 然后像其它深度学习模型一样进行训练。 [详细说明](#OneshotMode)。 （当前仅支持 TensorFlow）。
-
-要使用 oneshot_mode，需要在配置的 `trial` 部分增加如下字段。 在此模式中，不需要使用 Tuner，只需要在配置文件中添加任意一个Tuner。 此外，也不需要增加 `nni.training_update`，因为在训练过程中不需要更新。
-
-```diff
-trial:
-    command: 运行 Trial 的命令
-    codeDir: Trial 代码的目录
-    gpuNum: 每个 Trial 所需要的 GPU 数量
-
-+   #choice: classic_mode, enas_mode, oneshot_mode
-+   nasMode: oneshot_mode
-```
-
-***darts_mode***: 参考 [论文](https://arxiv.org/abs/1806.09055)中的训练方法。 与 oneshot_mode 类似。 有两个不同之处，首先 darts_mode 只将架构权重添加到候选操作的输出中，另外是交错的来训练模型权重和架构权重。 [详细说明](#DartsMode)。
-
-要使用 darts_mode，需要在配置的 `trial` 部分增加如下字段。 在此模式中，不需要使用 Tuner，只需要在配置文件中添加任意一个Tuner。
-
-```diff
-trial:
-    command: 运行 Trial 的命令
-    codeDir: Trial 代码的目录
-    gpuNum: 每个 Trial 所需要的 GPU 数量
-
-+   #choice: classic_mode, enas_mode, oneshot_mode
-+   nasMode: darts_mode
-```
-
-在使用 darts_mode 时，需要按照如下所示调用 `nni.training_update`，来更新架构权重。 更新架构权重时，和训练数据一样也需要`损失值`（即, `feed_dict`）。
-
-```python
-for _ in range(num):
-    # 训练架构权重
-    """@nni.training_update(tf=tf, session=self.session, loss=loss, feed_dict=feed_dict)"""
-    loss, _ = self.session.run([loss_op, train_op])
-```
-
-**注意**：对于 enas_mode、oneshot_mode、以及 darts_mode，NNI 仅能在训练阶段时有用。 NNI 不处理它们的推理阶段。 对于 enas_mode，推理阶段需要通过 Controller 来生成新的子图。 对于 oneshot_mode，推理阶段会随机采样生成新的子图，并选择其中好的子图。 对于 darts_mode，推理过程会根据架构权重来修剪掉一些候选的操作。
-
-<a name="ENASMode"></a>
-
-### enas_mode
-
-在 enas_mode 中，编译后的 Trial 代码会构建完整的图形（而不是子图），会接收所选择的架构，并在完整的图形上对此体系结构进行小型的批处理训练，然后再请求另一个架构。 通过 [NNI 多阶段 Experiment](./MultiPhase.md) 来支持。
-
-具体来说，使用 TensorFlow 的 Trial，通过 TensorFlow 变量来作为信号，并使用 TensorFlow 的条件函数来控制搜索空间（全图）来提高灵活性。这意味着根据这些信号，可以变为不同的多个子图。 [这里](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/enas_mode)是 enas_mode 的示例。
-
-<a name="OneshotMode"></a>
-
-### oneshot_mode
-
-下图展示了 Dropout 通过 `nni.mutable_layers` 添加在全图的位置，输入的是 1-k 个候选输入，4 个操作是候选的操作。
-
-![](../../img/oneshot_mode.png)
-
-如[论文](http://proceedings.mlr.press/v80/bender18a/bender18a.pdf)中的建议，应该为每层的输入实现 Dropout 方法。 当 0 < r < 1 是模型超参的取值范围（默认值为 0.01），k 是某层可选超参的数量，Dropout 比率设为 r^(1/k)。 fan-in 越高，每个输入被丢弃的可能性越大。 但某层丢弃所有可选输入的概率是常数，与 fan-in 无关。 假设 r = 0.05。 如果某层有 k = 2 个可选的输入，每个输入都会以独立的 0.051/2 ≈ 0.22 的概率被丢弃，也就是说有 0.78 的概率被保留。 如果某层有 k = 7 个可选的输入，每个输入都会以独立的 0.051/7 ≈ 0.65 的概率被丢弃，也就是说有 0.35 的概率被保留。 在这两种情况下，丢弃所有可选输入的概率是 5%。 候选操作的输出会通过同样的方法被丢弃。 [这里](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/oneshot_mode)是 oneshot_mode 的示例。
-
-<a name="DartsMode"></a>
-
-### darts_mode
-
-下图显示了通过 `nni.mutable_layers` 在全图中为某层加入架构权重，每个候选操作的输出会乘以架构权重。
-
-![](../../img/darts_mode.png)
-
-在 `nni.training_update` 中，TensorFlow 的 MomentumOptimizer 通过传递的 `loss` 和 `feed_dict` 来训练架构权重。 [这里](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas/darts_mode)是 darts_mode 的示例。
-
-### [**待实现**] One-Shot NAS 的多 Trial 任务。
-
-One-Shot NAS 通常只有一个带有完整图的 Trial 任务。 但是，同时运行多个 Trial 任务会很有用。 例如，在 enas_mode 中，多个 Trial 任务可以共享全图的权重来加速模型训练或收敛。 一些 One-Shot 不够稳定，运行多个 Trial 任务可以提升找到更好模型的概率。
-
-NNI 原生支持运行多个 Trial 任务。 下图显示了 NNI 上如何运行多个 Trial 任务。
-
-![](../../img/one-shot_training.png)
-
-=============================================================
-
-## NNI 上 NAS 的系统设计
-
-### Experiment 执行的基本流程
-
-NNI 的 Annotation 编译器会将 Trial 代码转换为可以接收架构选择并构建相应模型（如图）的代码。 NAS 的搜索空间可以看作是一个完整的图（在这里，完整的图意味着允许所有提供的操作符和连接来构建图），调优算法所选择的是其子图。 默认情况下，编译时 Trial 代码仅构建并执行子图。
-
-![](../../img/nas_on_nni.png)
-
-上图显示了 Trial 代码如何在 NNI 上运行。 `nnictl` 处理 Trial 代码，并生成搜索空间文件和编译后的 Trial 代码。 前者会输入 Tuner，后者会在 Trial 代码运行时使用。
-
-[使用 NAS 的简单示例](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-nas)。
-
-### [**待实现**] 权重共享
-
-在所选择的架构（即 Trial）之间共享权重可以加速模型搜索。 例如，适当地继承已完成 Trial 的权重可加速新 Trial 的收敛。 One-shot NAS（例如，ENAS，Darts）更为激进，不同架构（即子图）的训练会在完整图中共享相同的权重。
-
-![](../../img/nas_weight_share.png)
-
-权重分配（转移）在加速 NAS 中有关键作用，而找到有效的权重共享方式仍是热门的研究课题。 NNI 提供了一个键值存储，用于存储和加载权重。 Tuner 和 Trial 使用 KV 客户端库来访问存储。
-
-NNI 上的权重共享示例。
-
-## 通用的 NAS 调优算法
-
-与超参数调优一样，NAS 也需要相对通用的算法。 通用编程接口使其更容易。 这是 NAS 上[基于 PPO 算法的 RL Tuner](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/ppo_tuner)。 期待社区努力设计和实施更好的 NAS 调优算法。
-
-## [**待实现**] 导出最佳神经网络架构和代码
-
-Experiment 完成后，可通过 `nnictl experiment export --code` 来导出用最好的神经网络结构和 Trial 代码。
-
-## 结论和未来的工作
-
-如本文所示，不同的 NAS 算法和执行模式，可通过相同的编程接口来支持。
-
-在这一领域有许多系统和机器学习方向的有趣的研究主题。
\ No newline at end of file
diff --git a/docs/zh_CN/Assessor/BuiltinAssessor.md b/docs/zh_CN/Assessor/BuiltinAssessor.md
index 3a12f47006..93f72d251e 100644
--- a/docs/zh_CN/Assessor/BuiltinAssessor.md
+++ b/docs/zh_CN/Assessor/BuiltinAssessor.md
@@ -13,9 +13,9 @@ NNI 提供了先进的调优算法，使用上也很简单。 下面是内置 As
 
 ## 用法
 
-要使用 NNI 内置的 Assessor，需要在 `config.yml` 文件中添加 **builtinAssessorName** 和 **classArgs**。 这一节会介绍推荐的场景、参数等详细用法以及样例。
+要使用 NNI 内置的 Assessor，需要在 `config.yml` 文件中添加 **builtinAssessorName** 和 **classArgs**。 这一节会介绍推荐的场景、参数等详细用法以及示例。
 
-注意：参考样例中的格式来创建新的 `config.yml` 文件。
+注意：参考示例中的格式来创建新的 `config.yml` 文件。
 
 <a name="MedianStop"></a>
 
@@ -32,7 +32,7 @@ NNI 提供了先进的调优算法，使用上也很简单。 下面是内置 As
 * **optimize_mode** (*maximize 或 minimize, 可选, 默认值为 maximize*) - 如果为 'maximize', Assessor 会在结果小于期望值时**终止** Trial。 如果为 'minimize'，Assessor 会在结果大于期望值时**终止** Trial。
 * **start_step** (*int, 可选, 默认值为 0*) - 只有收到 start_step 个中间结果后，才开始判断是否一个 Trial 应该被终止。
 
-**使用样例：**
+**使用示例：**
 
 ```yaml
 # config.yml
@@ -63,7 +63,7 @@ assessor:
 * **threshold** (*float, 可选, 默认值为 0.95*) - 用来确定提前终止较差结果的阈值。 例如，如果 threshold = 0.95, optimize_mode = maximize，最好的历史结果是 0.9，那么会在 Trial 的预测值低于 0.95 * 0.9 = 0.855 时停止。
 * **gap** (*int, 可选, 默认值为 1*) - Assessor 两次评估之间的间隔次数。 例如：如果 gap = 2, start_step = 6，就会评估第 6, 8, 10, 12... 个中间结果。
 
-**使用样例：**
+**使用示例：**
 
 ```yaml
 # config.yml
diff --git a/docs/zh_CN/Assessor/CustomizeAssessor.md b/docs/zh_CN/Assessor/CustomizeAssessor.md
index 27da5f374c..4679bdd12b 100644
--- a/docs/zh_CN/Assessor/CustomizeAssessor.md
+++ b/docs/zh_CN/Assessor/CustomizeAssessor.md
@@ -54,7 +54,9 @@ assessor:
 
 注意在 **2** 中， `trial_history` 对象与 Trial 通过 `report_intermediate_result` 函数返回给 Assessor 的对象完全一致。
 
-更多样例，可参考：
+Assessor 的工作目录是`<home>/nni/experiments/<experiment_id>/log` 可从环境变量 `NNI_LOG_DIRECTORY` 中获取。
+
+更多示例，可参考：
 
 > * [medianstop-assessor](https://github.com/Microsoft/nni/tree/master/src/sdk/pynni/nni/medianstop_assessor)
 > * [curvefitting-assessor](https://github.com/Microsoft/nni/tree/master/src/sdk/pynni/nni/curvefitting_assessor)
\ No newline at end of file
diff --git a/docs/zh_CN/Compressor/Overview.md b/docs/zh_CN/Compressor/Overview.md
index cd56012f2d..ae8722d5f6 100644
--- a/docs/zh_CN/Compressor/Overview.md
+++ b/docs/zh_CN/Compressor/Overview.md
@@ -1,8 +1,11 @@
-# Compressor
+# 使用 NNI 进行模型压缩
+随着更多层和节点大型神经网络的使用，降低其存储和计算成本变得至关重要，尤其是对于某些实时应用程序。 模型压缩可用于解决此问题。
 
-我们很高兴的宣布，基于 NNI 的模型压缩工具发布了 Alpha 版本。该版本仍处于试验阶段，根据用户反馈会进行改进。 诚挚邀请您使用、反馈，或更多贡献。
+我们很高兴的宣布，基于 NNI 的模型压缩工具发布了试用版本。该版本仍处于试验阶段，根据用户反馈会进行改进。 诚挚邀请您使用、反馈，或有更多贡献。
 
-NNI 提供了易于使用的工具包来帮助用户设计并使用压缩算法。 其使用了统一的接口来支持 TensorFlow 和 PyTorch。 只需要添加几行代码即可压缩模型。 NNI 中也内置了一些流程的模型压缩算法。 用户还可以通过 NNI 强大的自动调参功能来找到最好的压缩后的模型，详见[自动模型压缩](./AutoCompression.md)。 另外，用户还能使用 NNI 的接口，轻松定制新的压缩算法，详见[教程](#customize-new-compression-algorithms)。
+NNI 提供了易于使用的工具包来帮助用户设计并使用压缩算法。 当前支持基于 PyTorch 的统一接口。 只需要添加几行代码即可压缩模型。 NNI 中也内置了一些流程的模型压缩算法。 用户还可以通过 NNI 强大的自动调参功能来找到最好的压缩后的模型，详见[自动模型压缩](./AutoCompression.md)。 另外，用户还能使用 NNI 的接口，轻松定制新的压缩算法，详见[教程](#customize-new-compression-algorithms)。
+
+模型压缩方面的综述可参考：[Recent Advances in Efficient Computation of Deep Convolutional Neural Networks](https://arxiv.org/pdf/1802.00939.pdf)。
 
 ## 支持的算法
 
@@ -10,22 +13,31 @@ NNI 提供了几种压缩算法，包括剪枝和量化算法：
 
 **剪枝**
 
-| 名称                                              | 算法简介                                                                                                                                   |
-| ----------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- |
-| [Level Pruner](./Pruner.md#level-pruner)        | 根据权重的绝对值，来按比例修剪权重。                                                                                                                     |
-| [AGP Pruner](./Pruner.md#agp-pruner)            | 自动的逐步剪枝（是否剪枝的判断：基于对模型剪枝的效果）[参考论文](https://arxiv.org/abs/1710.01878)                                                                    |
-| [L1Filter Pruner](./Pruner.md#l1filter-pruner)  | 剪除卷积层中最不重要的过滤器 (PRUNING FILTERS FOR EFFICIENT CONVNETS)[参考论文](https://arxiv.org/abs/1608.08710)                                        |
-| [Slim Pruner](./Pruner.md#slim-pruner)          | 通过修剪 BN 层中的缩放因子来修剪卷积层中的通道 (Learning Efficient Convolutional Networks through Network Slimming)[参考论文](https://arxiv.org/abs/1708.06519) |
-| [Lottery Ticket Pruner](./Pruner.md#agp-pruner) | "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" 提出的剪枝过程。 它会反复修剪模型。 [参考论文](https://arxiv.org/abs/1803.03635) |
-| [FPGM Pruner](./Pruner.md#fpgm-pruner)          | Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration [参考论文](https://arxiv.org/pdf/1811.00250.pdf)   |
+剪枝算法通过删除冗余权重或层通道来压缩原始网络，从而降低模型复杂性并解决过拟合问题。
+
+| 名称                                                                           | 算法简介                                                                                                                                    |
+| ---------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
+| [Level Pruner](./Pruner.md#level-pruner)                                     | 根据权重的绝对值，来按比例修剪权重。                                                                                                                      |
+| [AGP Pruner](./Pruner.md#agp-pruner)                                         | 自动的逐步剪枝（是否剪枝的判断：基于对模型剪枝的效果）[参考论文](https://arxiv.org/abs/1710.01878)                                                                     |
+| [Lottery Ticket Pruner](./Pruner.md#agp-pruner)                              | "The Lottery Ticket Hypothesis: Finding Sparse, Trainable Neural Networks" 提出的剪枝过程。 它会反复修剪模型。 [参考论文](https://arxiv.org/abs/1803.03635)  |
+| [FPGM Pruner](./Pruner.md#fpgm-pruner)                                       | Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration [参考论文](https://arxiv.org/pdf/1811.00250.pdf)    |
+| [L1Filter Pruner](./Pruner.md#l1filter-pruner)                               | 在卷积层中具有最小 L1 权重规范的剪枝过滤器（用于 Efficient Convnets 的剪枝过滤器） [参考论文](https://arxiv.org/abs/1608.08710)                                          |
+| [L2Filter Pruner](./Pruner.md#l2filter-pruner)                               | 在卷积层中具有最小 L2 权重规范的剪枝过滤器                                                                                                                 |
+| [ActivationAPoZRankFilterPruner](./Pruner.md#ActivationAPoZRankFilterPruner) | 基于指标 APoZ（平均百分比零）的剪枝过滤器，该指标测量（卷积）图层激活中零的百分比。 [参考论文](https://arxiv.org/abs/1607.03250)                                                   |
+| [ActivationMeanRankFilterPruner](./Pruner.md#ActivationMeanRankFilterPruner) | 基于计算输出激活最小平均值指标的剪枝过滤器                                                                                                                   |
+| [Slim Pruner](./Pruner.md#slim-pruner)                                       | 通过修剪 BN 层中的缩放因子来修剪卷积层中的通道 (Learning Efficient Convolutional Networks through Network Slimming) [参考论文](https://arxiv.org/abs/1708.06519) |
+
 
 **量化**
 
+量化算法通过减少表示权重或激活所需的精度位数来压缩原始网络，这可以减少计算和推理时间。
+
 | 名称                                                  | 算法简介                                                                                                                                                                       |
 | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [Naive Quantizer](./Quantizer.md#naive-quantizer)   | 默认将权重量化为 8 位                                                                                                                                                               |
 | [QAT Quantizer](./Quantizer.md#qat-quantizer)       | 为 Efficient Integer-Arithmetic-Only Inference 量化并训练神经网络。 [参考论文](http://openaccess.thecvf.com/content_cvpr_2018/papers/Jacob_Quantization_and_Training_CVPR_2018_paper.pdf) |
 | [DoReFa Quantizer](./Quantizer.md#dorefa-quantizer) | DoReFa-Net: 通过低位宽的梯度算法来训练低位宽的卷积神经网络。 [参考论文](https://arxiv.org/abs/1606.06160)                                                                                              |
+| [BNN Quantizer](./Quantizer.md#BNN-Quantizer)       | 二进制神经网络：使用权重和激活限制为 +1 或 -1 的深度神经网络。 [参考论文](https://arxiv.org/abs/1602.02830)                                                                                               |
 
 ## 内置压缩算法的用法
 
@@ -57,17 +69,46 @@ pruner.compress()
 实例化压缩算法时，会传入 `config_list`。 配置说明如下。
 
 ### 压缩算法中的用户配置
+压缩模型时，用户可能希望指定稀疏率，为不同类型的操作指定不同的比例，排除某些类型的操作，或仅压缩某类操作。 配置规范可用于表达此类需求。 可将其视为一个 Python 的 `list` 对象，其中每个元素都是一个 `dict` 对象。
 
-压缩模型时，用户可能希望指定稀疏率，为不同类型的操作指定不同的比例，排除某些类型的操作，或仅压缩某类操作。 配置规范可用于表达此类需求。 可将其视为一个 Python 的 `list` 对象，其中每个元素都是一个 `dict` 对象。 在每个 `dict` 中，有一些 NNI 压缩算法支持的键值：
+`list` 中的 `dict` 会依次被应用，也就是说，如果一个操作出现在两个配置里，后面的 `dict` 会覆盖前面的配置。
+
+#### 通用键值
+在每个 `dict` 中，有一些 NNI 压缩算法支持的键值：
 
 * __op_types__：指定要压缩的操作类型。 'default' 表示使用算法的默认设置。
 * __op_names__：指定需要压缩的操作的名称。 如果没有设置此字段，操作符不会通过名称筛选。
 * __exclude__：默认为 False。 如果此字段为 True，表示要通过类型和名称，将一些操作从压缩中排除。
 
-`dict` 还有一些其它键值，由特定的压缩算法所使用。 例如：
+#### 量化算法的键值
+**如果使用量化算法，则需要设置更多键值。 如果使用剪枝算法，则可以忽略这些键值**
 
-`list` 中的 `dict` 会依次被应用，也就是说，如果一个操作出现在两个配置里，后面的 `dict` 会覆盖前面的配置。
+* __quant_types__ : 字符串列表。
+
+要应用量化的类型，当前支持 "权重"，"输入"，"输出"。 "权重"是指将量化操作应用到 module 的权重参数上。 "输入" 是指对 module 的 forward 方法的输入应用量化操作。 "输出"是指将量化运法应用于模块 forward 方法的输出，在某些论文中，这种方法称为"激活"。
 
+* __quant_bits__ : int 或 dict {str : int}
+
+量化的位宽，键是量化类型，值是量化位宽度，例如：
+```
+{
+    quant_bits: {
+        'weight': 8,
+        'output': 4,
+        },
+}
+```
+当值为 int 类型时，所有量化类型使用相同的位宽。 例如：
+```
+{
+    quant_bits: 8, # 权重和输出的位宽都为 8 bits
+}
+```
+#### 为每个压缩算法指定的其他键
+`dict` 还有一些其它键值，由特定的压缩算法所使用。 例如， [Level Pruner](./Pruner.md#level-pruner) 需要 `sparsity` 键，用于指定修剪的量。
+
+
+#### 示例
 配置的简单示例如下：
 
 ```python
@@ -178,11 +219,9 @@ class YourPruner(nni.compression.tensorflow.Pruner):
 定制量化算法的接口与剪枝算法类似。 唯一的不同是使用 `quantize_weight` 替换了 `calc_mask`。 `quantize_weight` 直接返回量化后的权重，而不是 mask。这是因为对于量化算法，量化后的权重不能通过应用 mask 来获得。
 
 ```python
-# TensorFlow 中定制 Quantizer。
-# PyTorch 的 Quantizer，只需将
-# nni.compression.tensorflow.Quantizer 替换为
-# nni.compression.torch.Quantizer
-class YourQuantizer(nni.compression.tensorflow.Quantizer):
+from nni.compression.torch.compressor import Quantizer
+
+class YourQuantizer(Quantizer):
     def __init__(self, model, config_list):
         """
         建议使用 NNI 定义的规范来配置
@@ -231,27 +270,71 @@ class YourQuantizer(nni.compression.tensorflow.Quantizer):
         Parameters
         ----------
         inputs : Tensor
-            需要量化的输入
+            需要被量化的张量
         config : dict
-            输入量化用的配置
+            输入量化的配置
         """
 
-        # 实现生成 `new_input`
+        # 生成 `new_input` 的代码
 
         return new_input
 
-    # Pytorch 版本不需要 sess 参数
-    def update_epoch(self, epoch_num, sess):
+    def update_epoch(self, epoch_num):
         pass
 
-    # Pytorch 版本不需要 sess 参数
-    def step(self, sess):
+    def step(self):
         """
-       根据需要可基于 bind_model 方法中的模型或权重进行操作
+        Can do some processing based on the model or weights binded
+        in the func bind_model
         """
         pass
 ```
+#### 定制 backward 函数
+有时，量化操作必须自定义 backward 函数，例如 [Straight-Through Estimator](https://stackoverflow.com/questions/38361314/the-concept-of-straight-through-estimator-ste)，可如下定制 backward 函数：
+
+```python
+from nni.compression.torch.compressor import Quantizer, QuantGrad, QuantType
+
+class ClipGrad(QuantGrad):
+    @staticmethod
+    def quant_backward(tensor, grad_output, quant_type):
+        """
+        此方法应被子类重载来提供定制的 backward 函数，
+        默认实现是 Straight-Through Estimator
+        Parameters
+        ----------
+        tensor : Tensor
+            量化操作的输入
+        grad_output : Tensor
+            量化操作输出的梯度
+        quant_type : QuantType
+            量化类型，可为 `QuantType.QUANT_INPUT`, `QuantType.QUANT_WEIGHT`, `QuantType.QUANT_OUTPUT`,
+            可为不同的类型定义不同的行为。
+        Returns
+        -------
+        tensor
+            量化输入的梯度
+        """
+
+        # 对于 quant_output 函数，如果张量的绝对值大于 1，则将梯度设置为 0
+        if quant_type == QuantType.QUANT_OUTPUT: 
+            grad_output[torch.abs(tensor) > 1] = 0
+        return grad_output
+
+
+class YourQuantizer(Quantizer):
+    def __init__(self, model, config_list):
+        super().__init__(model, config_list)
+        # 定制 backward 函数来重载默认的 backward 函数
+        self.quant_grad = ClipGrad
+
+```
 
-### 使用用户自定义的压缩算法
+如果不定制 `QuantGrad`，默认的 backward 为 Straight-Through Estimator。 _即将推出_...
 
-__[TODO]__ ...
+## **参考和反馈**
+* 在 GitHub 中[提交此功能的 Bug](https://github.com/microsoft/nni/issues/new?template=bug-report.md)；
+* 在 GitHub 中[提交新功能或改进请求](https://github.com/microsoft/nni/issues/new?template=enhancement.md)；
+* 了解 NNI 中[特征工程的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/FeatureEngineering/Overview.md)；
+* 了解 NNI 中[ NAS 的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/NAS/Overview.md)；
+* 了解如何[使用 NNI 进行超参数调优](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Tuner/BuiltinTuner.md)；
diff --git a/docs/zh_CN/Compressor/Pruner.md b/docs/zh_CN/Compressor/Pruner.md
index ab57596a43..0e7963c9d8 100644
--- a/docs/zh_CN/Compressor/Pruner.md
+++ b/docs/zh_CN/Compressor/Pruner.md
@@ -1,6 +1,19 @@
 NNI Compressor 中的 Pruner
 ===
 
+支持的剪枝算法
+* [Level Pruner](#level-pruner)
+* [AGP Pruner](#agp-pruner)
+* [Lottery Ticket 假设](#lottery-ticket-hypothesis)
+* [Slim Pruner](#slim-pruner)
+* [具有权重等级的 Filter Pruners](#weightrankfilterpruner)
+    * [FPGM Pruner](#fpgm-pruner)
+    * [L1Filter Pruner](#l1filter-pruner)
+    * [L2Filter Pruner](#l2filter-pruner)
+* [具有激活等级的 Filter Pruners](#activationrankfilterpruner)
+    * [APoZ Rank Pruner](#activationapozrankfilterpruner)
+    * [Activation Mean Rank Pruner](#activationmeanrankfilterpruner)
+
 ## Level Pruner
 
 这是个基本的一次性 Pruner：可设置目标稀疏度（以分数表示，0.6 表示会剪除 60%）。
@@ -10,7 +23,7 @@ NNI Compressor 中的 Pruner
 ### 用法
 
 TensorFlow 代码
-```
+```python
 from nni.compression.tensorflow import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
 pruner = LevelPruner(model_graph, config_list)
@@ -18,7 +31,7 @@ pruner.compress()
 ```
 
 PyTorch 代码
-```
+```python
 from nni.compression.torch import LevelPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['default'] }]
 pruner = LevelPruner(model, config_list)
@@ -37,8 +50,6 @@ pruner.compress()
 ### 用法
 通过下列代码，可以在 10 个 Epoch 中将权重稀疏度从 0% 剪枝到 80%。
 
-首先，导入 Pruner 来为模型添加遮盖。
-
 TensorFlow 代码
 ```python
 from nni.compression.tensorflow import AGP_Pruner
@@ -68,7 +79,7 @@ pruner = AGP_Pruner(model, config_list)
 pruner.compress()
 ```
 
-其次，在训练代码中每完成一个 Epoch，更新一下 Epoch 数值。
+在训练代码中每完成一个 Epoch，更新一下 Epoch 数值。
 
 TensorFlow 代码
 ```python
@@ -130,12 +141,45 @@ for _ in pruner.get_prune_iterations():
 * **sparsity:** 压缩完成后的最终稀疏度。
 
 ***
-## FPGM Pruner
+
+## Slim Pruner
+
+这是一次性的 Pruner，在 ['Learning Efficient Convolutional Networks through Network Slimming'](https://arxiv.org/pdf/1708.06519.pdf) 中提出，作者 Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan 以及 Changshui Zhang。
+
+![](../../img/slim_pruner.png)
+
+> Slim Pruner **会遮盖卷据层通道之后 BN 层对应的缩放因子**，训练时在缩放因子上的 L1 正规化应在批量正规化 (BN) 层之后来做。BN 层的缩放因子在修剪时，是**全局排序的**，因此稀疏模型能自动找到给定的稀疏度。
+
+### 用法
+
+PyTorch 代码
+
+```python
+from nni.compression.torch import SlimPruner
+config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
+pruner = SlimPruner(model, config_list)
+pruner.compress()
+```
+
+#### Slim Pruner 的用户配置
+
+- **sparsity:**，指定压缩的稀疏度。
+- **op_types:** 在 Slim Pruner 中仅支持 BatchNorm2d。
+
+
+## WeightRankFilterPruner
+WeightRankFilterPruner 是一系列的 Pruner，在卷积层权重上，用最小的重要性标准修剪过滤器，来达到预设的网络稀疏度。
+
+### FPGM Pruner
+
 这是一种一次性的 Pruner，FPGM Pruner 是论文 [Filter Pruning via Geometric Median for Deep Convolutional Neural Networks Acceleration](https://arxiv.org/pdf/1811.00250.pdf) 的实现
+
+具有最小几何中位数的 FPGMPruner 修剪过滤器
+
+ ![](../../img/fpgm_fig1.png)
 > 以前的方法使用 “smaller-norm-less-important” 准则来修剪卷积神经网络中规范值较小的。 本文中，分析了基于规范的准则，并指出其所依赖的两个条件不能总是满足：(1) 过滤器的规范偏差应该较大；(2) 过滤器的最小规范化值应该很小。 为了解决此问题，提出了新的过滤器修建方法，即 Filter Pruning via Geometric Median (FPGM)，可不考虑这两个要求来压缩模型。 与以前的方法不同，FPGM 通过修剪冗余的，而不是相关性更小的部分来压缩 CNN 模型。
 
-### 用法
-首先，导入 Pruner 来为模型添加遮盖。
+#### 用法
 
 TensorFlow 代码
 ```python
@@ -159,7 +203,7 @@ pruner.compress()
 ```
 注意：FPGM Pruner 用于修剪深度神经网络中的卷积层，因此 `op_types` 字段仅支持卷积层。
 
-另外，需要在每个 epoch 开始的地方添加下列代码来更新 epoch 的编号。
+需要在每个 epoch 开始的地方添加下列代码来更新 epoch 的编号。
 
 TensorFlow 代码
 ```python
@@ -176,9 +220,9 @@ pruner.update_epoch(epoch)
 
 ***
 
-## L1Filter Pruner
+### L1Filter Pruner
 
-这是一种一次性的 Pruner，由 ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710) 提出，作者 Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet 和 Hans Peter Graf。
+这是一种一次性的 Pruner，由 ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710) 提出，作者 Hao Li, Asim Kadav, Igor Durdanovic, Hanan Samet 和 Hans Peter Graf。 [重现的实验结果](l1filterpruner.md)
 
 ![](../../img/l1filter_pruner.png)
 
@@ -191,7 +235,11 @@ pruner.update_epoch(epoch)
 > 3. 修剪 ![](http://latex.codecogs.com/gif.latex?m) 具有最小求和值及其相应特征图的筛选器。 在 下一个卷积层中，被剪除的特征图所对应的内核也被移除。
 > 4. 为第 ![](http://latex.codecogs.com/gif.latex?i) 和 ![](http://latex.codecogs.com/gif.latex?i+1) 层创建新的内核举证，并保留剩余的内核 权重，并复制到新模型中。
 
-```
+#### 用法
+
+PyTorch 代码
+
+```python
 from nni.compression.torch import L1FilterPruner
 config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
 pruner = L1FilterPruner(model, config_list)
@@ -201,28 +249,91 @@ pruner.compress()
 #### L1Filter Pruner 的用户配置
 
 - **sparsity:**，指定压缩的稀疏度。
-- **op_types:** 在 L1Filter Pruner 中仅支持 Conv2d。
+- **op_types:** 在 L1Filter Pruner 中仅支持 Conv1d 和 Conv2d。
 
-## Slim Pruner
+***
 
-这是一次性的 Pruner，在 ['Learning Efficient Convolutional Networks through Network Slimming'](https://arxiv.org/pdf/1708.06519.pdf) 中提出，作者 Zhuang Liu, Jianguo Li, Zhiqiang Shen, Gao Huang, Shoumeng Yan 以及 Changshui Zhang。
+### L2Filter Pruner
 
-![](../../img/slim_pruner.png)
+这是一种结构化剪枝算法，用于修剪权重的最小 L2 规范筛选器。 它被实现为一次性修剪器。
 
-> Slim Pruner **会遮盖卷据层通道之后 BN 层对应的缩放因子**，训练时在缩放因子上的 L1 正规化应在批量正规化 (BN) 层之后来做。BN 层的缩放因子在修剪时，是**全局排序的**，因此稀疏模型能自动找到给定的稀疏度。
+#### 用法
 
-### 用法
+PyTorch 代码
+
+```python
+from nni.compression.torch import L2FilterPruner
+config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'] }]
+pruner = L2FilterPruner(model, config_list)
+pruner.compress()
+```
+
+#### L2Filter Pruner 的用户配置
+
+- **sparsity:**，指定压缩的稀疏度。
+- **op_types:** 在 L2Filter Pruner 中仅支持 Conv1d 和 Conv2d。
+
+## ActivationRankFilterPruner
+ActivationRankFilterPruner 是一系列的 Pruner，从卷积层激活的输出，用最小的重要性标准修剪过滤器，来达到预设的网络稀疏度。
+
+### ActivationAPoZRankFilterPruner
+
+我们将其实现为一次性剪枝器，它基于 `APoZ` 修剪卷积层，参考论文 [Network Trimming: A Data-Driven Neuron Pruning Approach towards Efficient Deep Architectures](https://arxiv.org/abs/1607.03250)。 基于迭代剪枝的 `APoZ` 将在以后的版本中支持。
+
+APoZ 定义为：
+
+![](../../img/apoz.png)
+
+#### 用法
 
 PyTorch 代码
 
+```python
+from nni.compression.torch import ActivationAPoZRankFilterPruner
+config_list = [{
+    'sparsity': 0.5,
+    'op_types': ['Conv2d']
+}]
+pruner = ActivationAPoZRankFilterPruner(model, config_list, statistics_batch_num=1)
+pruner.compress()
 ```
-from nni.compression.torch import SlimPruner
-config_list = [{ 'sparsity': 0.8, 'op_types': ['BatchNorm2d'] }]
-pruner = SlimPruner(model, config_list)
+
+注意：ActivationAPoZRankFilterPruner 用于修剪深度神经网络中的卷积层，因此 `op_types` 字段仅支持卷积层。
+
+查看示例进一步了解
+
+#### ActivationAPoZRankFilterPruner 的用户配置
+
+- **sparsity:** 卷积过滤器要修剪的百分比。
+- **op_types:** 在 ActivationAPoZRankFilterPruner 中仅支持 Conv2d。
+
+***
+
+### ActivationMeanRankFilterPruner
+
+其实现为一次性修剪器，基于 `平均激活` 准则来修剪卷积层，在论文 [Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440) 的 2.2 节中有说明。 本文中提到的其他修剪标准将在以后的版本中支持。
+
+#### 用法
+
+PyTorch 代码
+
+```python
+from nni.compression.torch import ActivationMeanRankFilterPruner
+config_list = [{
+    'sparsity': 0.5,
+    'op_types': ['Conv2d']
+}]
+pruner = ActivationMeanRankFilterPruner(model, config_list)
 pruner.compress()
 ```
 
-#### Slim Pruner 的用户配置
+注意：ActivationMeanRankFilterPruner 用于修剪深度神经网络中的卷积层，因此 `op_types` 字段仅支持卷积层。
 
-- **sparsity:**，指定压缩的稀疏度。
-- **op_types:** 在 Slim Pruner 中仅支持 BatchNorm2d。
+查看示例进一步了解
+
+#### ActivationMeanRankFilterPruner 的用户配置
+
+- **sparsity:** 卷积过滤器要修剪的百分比。
+- **op_types:** 在 ActivationMeanRankFilterPruner 中仅支持 Conv2d。
+
+***
\ No newline at end of file
diff --git a/docs/zh_CN/Compressor/Quantizer.md b/docs/zh_CN/Compressor/Quantizer.md
index 6588963551..d2a571f874 100644
--- a/docs/zh_CN/Compressor/Quantizer.md
+++ b/docs/zh_CN/Compressor/Quantizer.md
@@ -1,18 +1,14 @@
 NNI Compressor 中的 Quantizer
 ===
-
 ## Naive Quantizer
 
 Naive Quantizer 将 Quantizer 权重默认设置为 8 位，可用它来测试量化算法。
 
 ### 用法
-Tensorflow
-```python
-nni.compressors.tensorflow.NaiveQuantizer(model_graph).compress()
+tensorflow ```python nni.compression.tensorflow.NaiveQuantizer(model_graph).compress()
 ```
-PyTorch
-```python
-nni.compressors.torch.NaiveQuantizer(model).compress()
+pytorch
+```python nni.compression.torch.NaiveQuantizer(model).compress()
 ```
 
 ***
@@ -27,7 +23,7 @@ nni.compressors.torch.NaiveQuantizer(model).compress()
 
 PyTorch 代码
 ```python
-from nni.compressors.torch import QAT_Quantizer
+from nni.compression.torch import QAT_Quantizer
 model = Mnist()
 
 config_list = [{
@@ -49,9 +45,13 @@ quantizer.compress()
 查看示例进一步了解
 
 #### QAT Quantizer 的用户配置
-* **quant_types:**: 字符串列表 要应用的量化类型，当前支持 'weight', 'input', 'output'
-* **quant_bits:** int 或 {str : int} 的 dict 量化的位长，主键是量化类型，键值为长度，例如。 {'weight', 8}, 当类型为 int 时，所有量化类型都用同样的位长
-* **quant_start_step:** int 在运行到某步骤前，对模型禁用量化。这让网络在进入更稳定的 状态后再激活量化，这样不会配除掉一些分数显著的值，默认为 0
+压缩算法所需的常见配置可在[通用配置](./Overview.md#User-configuration-for-a-compression-algorithm)中找到。
+
+此算法所需的配置：
+
+* **quant_start_step:** int
+
+在运行到某步骤前，对模型禁用量化。这让网络在进入更稳定的 状态后再激活量化，这样不会配除掉一些分数显著的值，默认为 0
 
 ### 注意
 当前不支持批处理规范化折叠。
@@ -63,17 +63,14 @@ quantizer.compress()
 ### 用法
 要实现 DoReFa Quantizer，在训练代码前加入以下代码。
 
-TensorFlow 代码
-```python
-from nni.compressors.tensorflow import DoReFaQuantizer
-config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
-quantizer = DoReFaQuantizer(tf.get_default_graph(), config_list)
-quantizer.compress()
-```
 PyTorch 代码
 ```python
-from nni.compressors.torch import DoReFaQuantizer
-config_list = [{ 'q_bits': 8, 'op_types': 'default' }]
+from nni.compression.torch import DoReFaQuantizer
+config_list = [{ 
+    'quant_types': ['weight'],
+    'quant_bits': 8, 
+    'op_types': 'default' 
+}]
 quantizer = DoReFaQuantizer(model, config_list)
 quantizer.compress()
 ```
@@ -81,4 +78,52 @@ quantizer.compress()
 查看示例进一步了解
 
 #### DoReFa Quantizer 的用户配置
-* **q_bits:** 指定需要被量化的位数。
+压缩算法所需的常见配置可在[通用配置](./Overview.md#User-configuration-for-a-compression-algorithm)中找到。
+
+此算法所需的配置：
+
+
+## BNN Quantizer
+在 [Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](https://arxiv.org/abs/1602.02830) 中，
+> 引入了一种训练二进制神经网络（BNN）的方法 - 神经网络在运行时使用二进制权重。 在训练时，二进制权重和激活用于计算参数梯度。 在 forward 过程中，BNN 会大大减少内存大小和访问，并将大多数算术运算替换为按位计算，可显著提高能源效率。
+
+
+### 用法
+
+PyTorch 代码
+```python
+from nni.compression.torch import BNNQuantizer
+model = VGG_Cifar10(num_classes=10)
+
+configure_list = [{
+    'quant_bits': 1,
+    'quant_types': ['weight'],
+    'op_types': ['Conv2d', 'Linear'],
+    'op_names': ['features.0', 'features.3', 'features.7', 'features.10', 'features.14', 'features.17', 'classifier.0', 'classifier.3']
+}, {
+    'quant_bits': 1,
+    'quant_types': ['output'],
+    'op_types': ['Hardtanh'],
+    'op_names': ['features.6', 'features.9', 'features.13', 'features.16', 'features.20', 'classifier.2', 'classifier.5']
+}]
+
+quantizer = BNNQuantizer(model, configure_list)
+model = quantizer.compress()
+```
+
+可以查看示例 [examples/model_compress/BNN_quantizer_cifar10.py](https://github.com/microsoft/nni/tree/master/examples/model_compress/BNN_quantizer_cifar10.py) 了解更多信息。
+
+#### BNN Quantizer 的用户配置
+压缩算法所需的常见配置可在[通用配置](./Overview.md#User-configuration-for-a-compression-algorithm)中找到。
+
+此算法所需的配置：
+
+### 实验
+我们实现了 [Binarized Neural Networks: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1](https://arxiv.org/abs/1602.02830) 中的一个实验，对 CIFAR-10 上的 **VGGNet** 进行了量化操作。 我们的实验结果如下：
+
+| 模型     | 精度     |
+| ------ | ------ |
+| VGGNet | 86.93% |
+
+
+实验代码可在 [examples/model_compress/BNN_quantizer_cifar10.py](https://github.com/microsoft/nni/tree/master/examples/model_compress/BNN_quantizer_cifar10.py) 
\ No newline at end of file
diff --git a/docs/zh_CN/Compressor/L1FilterPruner.md b/docs/zh_CN/Compressor/l1filterpruner.md
similarity index 66%
rename from docs/zh_CN/Compressor/L1FilterPruner.md
rename to docs/zh_CN/Compressor/l1filterpruner.md
index 49c3e50d62..a460854e56 100644
--- a/docs/zh_CN/Compressor/L1FilterPruner.md
+++ b/docs/zh_CN/Compressor/l1filterpruner.md
@@ -1,7 +1,7 @@
-NNI Compressor 中的 L1FilterPruner
+NN I 上的 L1FilterPruner
 ===
 
-## 1. 介绍
+## 介绍
 
 L1FilterPruner 是在卷积层中用来修剪过滤器的通用剪枝算法。
 
@@ -18,25 +18,9 @@ L1FilterPruner 是在卷积层中用来修剪过滤器的通用剪枝算法。
 > 3. 修剪 ![](http://latex.codecogs.com/gif.latex?m) 具有最小求和值及其相应特征图的筛选器。 在 下一个卷积层中，被剪除的特征图所对应的内核也被移除。
 > 4. 为第 ![](http://latex.codecogs.com/gif.latex?i) 和 ![](http://latex.codecogs.com/gif.latex?i+1) 层创建新的内核举证，并保留剩余的内核 权重，并复制到新模型中。
 
-## 2. 用法
+## 实验
 
-PyTorch 代码
-
-```
-from nni.compression.torch import L1FilterPruner
-config_list = [{ 'sparsity': 0.8, 'op_types': ['Conv2d'], 'op_names': ['conv1', 'conv2'] }]
-pruner = L1FilterPruner(model, config_list)
-pruner.compress()
-```
-
-#### L1Filter Pruner 的用户配置
-
-- **sparsity:**，指定压缩的稀疏度。
-- **op_types:** 在 L1Filter Pruner 中仅支持 Conv2d。
-
-## 3. 实验
-
-我们实现了 ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710) 中的一项实验， 即论文中，在 CIFAR-10 数据集上修剪 **VGG-16** 的 **VGG-16-pruned-A**，其中大约剪除了 $64\%$ 的参数。 我们的实验结果如下：
+我们通过 **L1FilterPruner** 实现了 ['PRUNING FILTERS FOR EFFICIENT CONVNETS'](https://arxiv.org/abs/1608.08710) 中的一项实验， 即论文中，在 CIFAR-10 数据集上修剪 **VGG-16** 的 **VGG-16-pruned-A**，其中大约剪除了 $64\%$ 的参数。 我们的实验结果如下：
 
 | 模型              | 错误率(论文/我们的) | 参数量      | 剪除率   |
 | --------------- | ----------- | -------- | ----- |
diff --git a/docs/zh_CN/FeatureEngineering/Overview.md b/docs/zh_CN/FeatureEngineering/Overview.md
index b8c0f41bd2..0ac9e0d399 100644
--- a/docs/zh_CN/FeatureEngineering/Overview.md
+++ b/docs/zh_CN/FeatureEngineering/Overview.md
@@ -1,6 +1,6 @@
-# 特征工程
+# NNI 中的特征工程
 
-我们很高兴的宣布，基于 NNI 的特征工程工具发布了 Alpha 版本。该版本仍处于试验阶段，根据使用反馈会进行改进。 诚挚邀请您使用、反馈，或更多贡献。
+我们很高兴的宣布，基于 NNI 的特征工程工具发布了试用版本。该版本仍处于试验阶段，根据使用反馈会进行改进。 诚挚邀请您使用、反馈，或更多贡献。
 
 当前支持以下特征选择器：
 - [GradientFeatureSelector](./GradientFeatureSelector.md)
@@ -253,4 +253,11 @@ print("Pipeline Score: ", pipeline.score(X_train, y_train))
 
 此基准测试可在[这里](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/)下载
 
-代码参考 `/examples/feature_engineering/gradient_feature_selector/benchmark_test.py`。
+代码参考 `/examples/feature_engineering/gradient_feature_selector/benchmark_test.py`。
+
+## **参考和反馈**
+* 在 GitHub 中[提交此功能的 Bug](https://github.com/microsoft/nni/issues/new?template=bug-report.md)；
+* 在 GitHub 中[提交新功能或改进请求](https://github.com/microsoft/nni/issues/new?template=enhancement.md)；
+* 了解 NNI 中[神经网络结构搜索的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/NAS/Overview.md)；
+* 了解 NNI 中[模型自动压缩的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Compressor/Overview.md)；
+* 了解如何[使用 NNI 进行超参数调优](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Tuner/BuiltinTuner.md)；
diff --git a/docs/zh_CN/NAS/Overview.md b/docs/zh_CN/NAS/Overview.md
index 509900aba5..1474a4d788 100644
--- a/docs/zh_CN/NAS/Overview.md
+++ b/docs/zh_CN/NAS/Overview.md
@@ -105,3 +105,10 @@ python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json
 2. 在神经网络上应用 NAS 时，需要统一的方式来表达架构的搜索空间，这样不必为不同的搜索算法来更改代码。
 
 NNI 提出的 API 在[这里](https://github.com/microsoft/nni/tree/master/src/sdk/pynni/nni/nas/pytorch)。 [这里](https://github.com/microsoft/nni/tree/master/examples/nas/darts)包含了基于此 API 的 NAS 实现示例。
+
+## **参考和反馈**
+* 在 GitHub 中[提交此功能的 Bug](https://github.com/microsoft/nni/issues/new?template=bug-report.md)；
+* 在 GitHub 中[提交新功能或改进请求](https://github.com/microsoft/nni/issues/new?template=enhancement.md)；
+* 了解 NNI 中[特征工程的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/FeatureEngineering/Overview.md)；
+* 了解 NNI 中[模型自动压缩的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Compressor/Overview.md)；
+* 了解如何[使用 NNI 进行超参数调优](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Tuner/BuiltinTuner.md)；
diff --git a/docs/zh_CN/NAS/PDARTS.md b/docs/zh_CN/NAS/PDARTS.md
new file mode 100644
index 0000000000..5c187a26e8
--- /dev/null
+++ b/docs/zh_CN/NAS/PDARTS.md
@@ -0,0 +1,18 @@
+# P-DARTS
+
+## 示例
+
+[示例代码](https://github.com/microsoft/nni/tree/master/examples/nas/pdarts)
+
+```bash
+＃如果未克隆 NNI 代码。 如果代码已被克隆，请忽略此行并直接进入代码目录。
+git clone https://github.com/Microsoft/nni.git
+
+# 搜索最好的架构
+cd examples/nas/pdarts
+python3 search.py
+
+# 训练最好的架构，过程与 darts 相同。
+cd ../darts
+python3 retrain.py --arc-checkpoint ../pdarts/checkpoints/epoch_2.json
+```
diff --git a/docs/zh_CN/NAS/SPOS.md b/docs/zh_CN/NAS/SPOS.md
new file mode 100644
index 0000000000..4251a6a9fe
--- /dev/null
+++ b/docs/zh_CN/NAS/SPOS.md
@@ -0,0 +1,118 @@
+# 单路径 One-Shot (SPOS)
+
+## 介绍
+
+在 [Single Path One-Shot Neural Architecture Search with Uniform Sampling](https://arxiv.org/abs/1904.00420) 中提出的 one-shot NAS 方法，通过构造简化的通过统一路径采样方法训练的超网络来解决 One-Shot 模型训练的问题。这样所有架构（及其权重）都得到了完全且平等的训练。 然后，采用进化算法无需任何微调即可有效的搜索出性能最佳的体系结构。
+
+在 NNI 上的实现基于 [官方 Repo](https://github.com/megvii-model/SinglePathOneShot). 实现了一个训练超级网络的 Trainer，以及一个利用 NNI 框架能力来加速进化搜索阶段的进化 Tuner。 还展示了
+
+## 示例
+
+此示例是论文中的搜索空间，使用 flops 限制来执行统一的采样方法。
+
+[示例代码](https://github.com/microsoft/nni/tree/master/examples/nas/spos)
+
+### 必需组件
+
+由于使用了 DALI 来加速 ImageNet 的数据读取，需要 NVIDIA DALI >= 0.16。 [安装指南](https://docs.nvidia.com/deeplearning/sdk/dali-developer-guide/docs/installation.html)
+
+从[这里](https://1drv.ms/u/s!Am_mmG2-KsrnajesvSdfsq_cN48?e=aHVppN) (由 [Megvii](https://github.com/megvii-model) 维护) 下载 flops 查找表。 将 `op_flops_dict.pkl` 和 `checkpoint-150000.pth.tar` (如果不需要重新训练超网络) 放到 `data` 目录中。
+
+准备标准格式的 ImageNet (参考[这里的脚本](https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4))。 将其链接到 `data/imagenet` 会更方便。
+
+准备好后，应具有以下代码结构：
+
+```
+spos
+├── architecture_final.json
+├── blocks.py
+├── config_search.yml
+├── data
+│   ├── imagenet
+│   │   ├── train
+│   │   └── val
+│   └── op_flops_dict.pkl
+├── dataloader.py
+├── network.py
+├── readme.md
+├── scratch.py
+├── supernet.py
+├── tester.py
+├── tuner.py
+└── utils.py
+```
+
+### 步骤 1. 训练超网络
+
+```
+python supernet.py
+```
+
+会将检查点导出到 `checkpoints` 目录中，为下一步做准备。
+
+注意：数据加载的官方 Repo [与通常的方法有所不同](https://github.com/megvii-model/SinglePathOneShot/issues/5)，使用了 BGR 张量，以及 0 到 255 之间的值来与自己的深度学习框架对齐。 选项 `--spos-preprocessing` 会模拟原始的使用行为，并能使用预训练的检查点。
+
+### 步骤 2. 进化搜索
+
+单路径 One-Shot 利用进化算法来搜索最佳架构。 tester 负责通过训练图像的子集来测试采样的体系结构，重新计算所有批处理规范，并在完整的验证集上评估架构。
+
+为了使 Tuner 识别 flops 限制并能计算 flops，在 `tuner.py` 中创建了新的 `EvolutionWithFlops` Tuner，其继承于 SDK 中的 tuner。
+
+要为 NNI 框架准备好搜索空间，首先运行
+
+```
+nnictl ss_gen -t "python tester.py"
+```
+
+将生成 `nni_auto_gen_search_space.json` 文件，这是搜索空间的序列化形式。
+
+默认情况下，它将使用前面下载的 `checkpoint-150000.pth.tar`。 如果要使用从自行训练的检查点，在 `config_search.yml` 中的命令上指定 `---checkpoint`。
+
+然后使用进化 Tuner 搜索。
+
+```
+nnictl create --config config_search.yml
+```
+
+从每个 Epoch 导出的最终架构可在 Tuner 工作目录下的 `checkpoints` 中找到，默认值为 `$HOME/nni/experiments/your_experiment_id/log`。
+
+### 步骤 3. 从头开始训练
+
+```
+python scratch.py
+```
+
+默认情况下，它将使用 `architecture_final.json`. 该体系结构由官方仓库提供（转换成了 NNI 格式）。 通过 `--fixed-arc` 选项，可使用任何结构（例如，步骤 2 中找到的结构）。
+
+## 参考
+
+### PyTorch
+
+```eval_rst
+..  autoclass:: nni.nas.pytorch.spos.SPOSEvolution
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.spos.SPOSSupernetTrainer
+    :members:
+
+    .. automethod:: __init__
+
+..  autoclass:: nni.nas.pytorch.spos.SPOSSupernetTrainingMutator
+    :members:
+
+    .. automethod:: __init__
+```
+
+## 已知的局限
+
+* 仅支持 Block 搜索。 尚不支持通道搜索。
+* 仅提供 GPU 版本。
+
+## 当前重现结果
+
+重现中。 由于官方版本和原始论文之间的不同，我们将当前结果与官方 Repo（我们运行的结果）和论文进行了比较。
+
+* 进化阶段几乎与官方 Repo 一致。 进化算法显示出了收敛趋势，在搜索结束时达到约 65% 的精度。 但此结果与论文不一致。 详情参考[此 issue](https://github.com/megvii-model/SinglePathOneShot/issues/6)。
+* 重新训练阶段未匹配。 我们的重新训练代码，使用了作者发布的架构，获得了 72.14% 的准确率，与官方发布的 73.61%，和原始论文中的 74.3% 有一定差距。
diff --git a/docs/zh_CN/Overview.md b/docs/zh_CN/Overview.md
index 45f717905a..72c3501e5d 100644
--- a/docs/zh_CN/Overview.md
+++ b/docs/zh_CN/Overview.md
@@ -37,7 +37,7 @@ Experiment 的运行过程为：Tuner 接收搜索空间并生成配置。 这
 > 
 > 第二步：[改动模型代码](TrialExample/Trials.md)
 > 
-> 第三步：[>定义 Experiment 配置](Tutorial/ExperimentConfig.md)
+> 第三步：[定义 Experiment 配置](Tutorial/ExperimentConfig.md)
 
 <p align="center">
 <img src="https://user-images.githubusercontent.com/23273522/51816627-5d13db80-2302-11e9-8f3e-627e260203d5.jpg" alt="绘图"/>
diff --git a/docs/zh_CN/Release.md b/docs/zh_CN/Release.md
index b4ba5a1b84..8a65ee765e 100644
--- a/docs/zh_CN/Release.md
+++ b/docs/zh_CN/Release.md
@@ -246,7 +246,7 @@
 
 * 修复了在某些极端条件下，不能正确存储任务的取消状态。
 * 修复在使用 SMAC Tuner 时，解析搜索空间的错误。
-* 修复 CIFAR-10 样例中的 broken pipe 问题。
+* 修复 CIFAR-10 示例中的 broken pipe 问题。
 * 为本地训练和 NNI 管理器添加单元测试。
 * 为远程服务器、OpenPAI 和 Kubeflow 训练平台在 Azure 中增加集成测试。
 * 在 OpenPAI 客户端中支持 Pylon 路径。
@@ -284,7 +284,7 @@
 * [FrameworkController 训练平台](TrainingService/FrameworkControllerMode.md)：支持使用在 Kubernetes 上使用 FrameworkController 运行。 
   * FrameworkController 是 Kubernetes 上非常通用的控制器（Controller），能用来运行基于各种机器学习框架的分布式作业，如 TensorFlow，Pytorch， MXNet 等。
   * NNI 为作业定义了统一而简单的规范。
-  * 如何使用 FrameworkController 的 MNIST 样例。
+  * 如何使用 FrameworkController 的 MNIST 示例。
 
 #### 改进用户体验
 
@@ -324,7 +324,7 @@
 ### 新示例
 
 * [FashionMnist](https://github.com/microsoft/nni/tree/master/examples/trials/network_morphism)，使用 network morphism Tuner
-* 使用 PyTorch 的[分布式 MNIST 样例](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-distributed-pytorch)
+* 使用 PyTorch 的[分布式 MNIST 示例](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-distributed-pytorch)
 
 ## 发布 0.4 - 12/6/2018
 
@@ -332,7 +332,7 @@
 
 * [Kubeflow 训练平台](TrainingService/KubeflowMode.md) 
   * 支持 tf-operator
-  * 使用 Kubeflow 的[分布式 Trial 样例](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-distributed/dist_mnist.py)
+  * 使用 Kubeflow 的[分布式 Trial 示例](https://github.com/microsoft/nni/tree/master/examples/trials/mnist-distributed/dist_mnist.py)
 * [遍历搜索 Tuner](Tuner/GridsearchTuner.md)
 * [Hyperband Tuner](Tuner/HyperbandAdvisor.md)
 * 支持在 MAC 上运行 NNI Experiment
@@ -372,7 +372,7 @@
 
 ### API 的新功能和更新
 
-* <span style="color:red"><strong>不兼容的改动</strong></span>：nn.get_parameters() 改为 nni.get_next_parameter。 所有以前版本的样例将无法在 v0.3 上运行，需要重新克隆 NNI 代码库获取新样例。 如果在自己的代码中使用了 NNI，也需要相应的更新。
+* <span style="color:red"><strong>不兼容的改动</strong></span>：nn.get_parameters() 改为 nni.get_next_parameter。 所有以前版本的示例将无法在 v0.3 上运行，需要重新克隆 NNI 代码库获取新示例。 如果在自己的代码中使用了 NNI，也需要相应的更新。
 
 * 新 API **nni.get_sequence_id()**。 每个 Trial 任务都会被分配一个唯一的序列数字，可通过 nni.get_sequence_id() API 来获取。
   
@@ -400,9 +400,9 @@
   docker pull msranni/nni:latest
   ```
 
-* 新的 Trial 样例：[NNI Sklearn 样例](https://github.com/microsoft/nni/tree/master/examples/trials/sklearn)
+* 新的 Trial 示例：[NNI Sklearn 示例](https://github.com/microsoft/nni/tree/master/examples/trials/sklearn)
 
-* 新的竞赛样例：[Kaggle Competition TGS Salt](https://github.com/microsoft/nni/tree/master/examples/trials/kaggle-tgs-salt)
+* 新的竞赛示例：[Kaggle Competition TGS Salt](https://github.com/microsoft/nni/tree/master/examples/trials/kaggle-tgs-salt)
 
 ### 其它
 
@@ -420,7 +420,7 @@
   * [SMAC](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) 基于 Sequential Model-Based Optimization (SMBO). 它会利用使用过的结果好的模型（高斯随机过程模型），并将随机森林引入到 SMBO 中，来处理分类参数。 NNI 的 SMAC 通过包装 [SMAC3](https://github.com/automl/SMAC3) 来支持。
 * 支持将 NNI 安装在 [conda](https://conda.io/docs/index.html) 和 Python 虚拟环境中。
 * 其它 
-  * 更新 ga squad 样例与相关文档
+  * 更新 ga squad 示例与相关文档
   * 用户体验改善及 Bug 修复
 
 ## 发布 0.1.0 - 9/10/2018 (首个版本)
diff --git a/docs/zh_CN/TrainingService/FrameworkControllerMode.md b/docs/zh_CN/TrainingService/FrameworkControllerMode.md
index 4c964d1e9b..1e45a858e4 100644
--- a/docs/zh_CN/TrainingService/FrameworkControllerMode.md
+++ b/docs/zh_CN/TrainingService/FrameworkControllerMode.md
@@ -32,7 +32,7 @@
 
 参考[Kubeflow 训练平台](KubeflowMode.md)的设计，FrameworkController 训练平台与其类似。
 
-## 样例
+## 示例
 
 FrameworkController 配置文件的格式如下：
 
@@ -93,7 +93,7 @@ frameworkcontrollerConfig:
 
 注意：如果用 FrameworkController 模式运行，需要在 YAML 文件中显式设置 `trainingServicePlatform: frameworkcontroller`。
 
-FrameworkController 模式的 Trial 配置格式，是 FrameworkController 官方配置的简化版。参考 [frameworkcontroller 的 tensorflow 样例](https://github.com/Microsoft/frameworkcontroller/blob/master/example/framework/scenario/tensorflow/cpu/tensorflowdistributedtrainingwithcpu.yaml) 了解详情。
+FrameworkController 模式的 Trial 配置格式，是 FrameworkController 官方配置的简化版。参考 [frameworkcontroller 的 tensorflow 示例](https://github.com/Microsoft/frameworkcontroller/blob/master/example/framework/scenario/tensorflow/cpu/tensorflowdistributedtrainingwithcpu.yaml) 了解详情。
 
 frameworkcontroller 模式中的 Trial 配置使用以下主键：
 
diff --git a/docs/zh_CN/TrainingService/LocalMode.md b/docs/zh_CN/TrainingService/LocalMode.md
index 0268e66b34..a526bbd210 100644
--- a/docs/zh_CN/TrainingService/LocalMode.md
+++ b/docs/zh_CN/TrainingService/LocalMode.md
@@ -1,6 +1,6 @@
 # **教程：使用 NNI API 在本地创建和运行 Experiment**
 
-本教程会使用 [~/examples/trials/mnist-tfv1] 样例来解释如何在本地使用 NNI API 来创建并运行 Experiment。
+本教程会使用 [~/examples/trials/mnist-tfv1] 示例来解释如何在本地使用 NNI API 来创建并运行 Experiment。
 
 > 在开始前
 
@@ -78,9 +78,9 @@
 
 **准备 Trial**：
 
-> 在克隆代码后，可以在 ~/nni/examples 中找到一些样例，运行 `ls examples/trials` 查看所有 Trial 样例。
+> 在克隆代码后，可以在 ~/nni/examples 中找到一些示例，运行 `ls examples/trials` 查看所有 Trial 示例。
 
-先从 NNI 提供的简单 Trial 样例，如 MNIST 开始。 NNI 样例在代码目录的 examples 中，运行 `ls ~/nni/examples/trials` 可以看到所有 Experiment 的样例。 执行下面的命令可轻松运行 NNI 的 mnist 样例：
+先从 NNI 提供的简单 Trial 示例，如 MNIST 开始。 NNI 示例在代码目录的 examples 中，运行 `ls ~/nni/examples/trials` 可以看到所有 Experiment 的示例。 执行下面的命令可轻松运行 NNI 的 mnist 示例：
 
       python ~/nni/examples/trials/mnist-annotation/mnist.py
     
@@ -97,7 +97,7 @@
 
 *builtinTunerName* 用来指定 NNI 中的 Tuner，*classArgs* 是传入到 Tuner的参数（内置 Tuner 在[这里](../Tuner/BuiltinTuner.md)），*optimization_mode* 表明需要最大化还是最小化 Trial 的结果。
 
-**准备配置文件**：实现 Trial 的代码，并选择或实现自定义的 Tuner 后，就要准备 YAML 配置文件了。 NNI 为每个 Trial 样例都提供了演示的配置文件，用命令`cat ~/nni/examples/trials/mnist-annotation/config.yml` 来查看其内容。 大致内容如下：
+**准备配置文件**：实现 Trial 的代码，并选择或实现自定义的 Tuner 后，就要准备 YAML 配置文件了。 NNI 为每个 Trial 示例都提供了演示的配置文件，用命令`cat ~/nni/examples/trials/mnist-annotation/config.yml` 来查看其内容。 大致内容如下：
 
 ```yaml
 authorName: your_name
diff --git a/docs/zh_CN/TrainingService/PaiMode.md b/docs/zh_CN/TrainingService/PaiMode.md
index 0191223f14..019ba21737 100644
--- a/docs/zh_CN/TrainingService/PaiMode.md
+++ b/docs/zh_CN/TrainingService/PaiMode.md
@@ -36,10 +36,13 @@ trial:
   cpuNum: 1
   memoryMB: 8196
   image: msranni/nni:latest
-# 配置访问的 OpenPAI 集群
+  virtualCluster: default
+  nniManagerNFSMountPath: /home/user/mnt
+  containerNFSMountPath: /mnt/data/user
+# 配置要访问的 OpenPAI 集群
 paiConfig:
   userName: your_pai_nni_user
-  passWord: your_pai_password
+  token: your_pai_token
   host: 10.1.1.1
 ```
 
@@ -56,56 +59,12 @@ paiConfig:
     * [Docker Hub](https://hub.docker.com/) 上有预制的 NNI Docker 映像 [nnimsra/nni](https://hub.docker.com/r/msranni/nni/)。 它包含了用来启动 NNI Experiment 所依赖的所有 Python 包，Node 模块和 JavaScript。 生成此 Docker 映像的文件在[这里](https://github.com/Microsoft/nni/tree/master/deployment/docker/Dockerfile)。 可以直接使用此映像，或参考它来生成自己的映像。
 * virtualCluster 
     * 可选。 设置 OpenPAI 的 virtualCluster，即虚拟集群。 如果未设置此参数，将使用默认（default）虚拟集群。
-* shmMB 
-    * 可选。 设置 OpenPAI 的 shmMB，即 Docker 中的共享内存。
-* authFile 
-    * 可选。在使用 pai 模式时，为私有 Docker 仓库设置认证文件，[见参考文档](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job)。提供 authFile 的本地路径即可， NNI 会上传此文件。
-
-* portList
-    
-    * 可选。 设置 OpenPAI 的 portList。指定了容器中使用的端口列表，[参考文档](https://github.com/microsoft/pai/blob/b2324866d0280a2d22958717ea6025740f71b9f0/docs/job_tutorial.md#specification)。  
-        示例如下：
-        portList:
-          - label: test
-            beginAt: 8080
-            portNumber: 2
-        
-    
-    假设需要在 MNIST 示例中使用端口来运行 TensorBoard。 第一步是编写 `mnist.py` 的包装脚本 `launch_pai.sh`。
-    
-    ```bash
-    export TENSORBOARD_PORT=PAI_PORT_LIST_${PAI_CURRENT_TASK_ROLE_NAME}_0_tensorboard
-    tensorboard --logdir . --port ${!TENSORBOARD_PORT} &
-    python3 mnist.py
-    ```
-    
-    portList 的配置部分如下：
-    
-    ```yaml
-    trial:
-    command: bash launch_pai.sh
-    portList:
-      - label: tensorboard
-        beginAt: 0
-        portNumber: 1
-    ```
-
-NNI 支持 OpenPAI 中的两种认证授权方法，即密码和 Token，[参考](https://github.com/microsoft/pai/blob/b6bd2ab1c8890f91b7ac5859743274d2aa923c22/docs/rest-server/API.md#2-authentication)。 认证在 `paiConfig` 字段中配置。   
-密码认证的 `paiConfig` 配置如下：
-
-    paiConfig:
-      userName: your_pai_nni_user
-      passWord: your_pai_password
-      host: 10.1.1.1
-    
-
-Token 认证的 `paiConfig` 配置如下：
-
-    paiConfig:
-      userName: your_pai_nni_user
-      token: your_pai_token
-      host: 10.1.1.1
-    
+* nniManagerNFSMountPath 
+    * 必填。 在 nniManager 计算机上设置挂载的路径。
+* containerNFSMountPath 
+    * 必填。 在 OpenPAI 的容器中设置挂载路径。
+* paiStoragePlugin 
+    * 必填。 设置 PAI 中使用的存储插件的名称。
 
 完成并保存 NNI Experiment 配置文件后（例如可保存为：exp_pai.yml），运行以下命令：
 
@@ -126,9 +85,7 @@ Token 认证的 `paiConfig` 配置如下：
 
 ## 数据管理
 
-如果训练数据集不大，可放在 codeDir 中，NNI会将其上传到 HDFS，或者构建 Docker 映像来包含数据。 如果数据集非常大，则不可放在 codeDir 中，可参考此[指南](https://github.com/microsoft/pai/blob/master/docs/user/storage.md)来将数据目录挂载到容器中。
-
-如果要将 Trial 的其它输出保存到 HDFS 上，如模型文件等，需要在 Trial 代码中使用 `NNI_OUTPUT_DIR` 来保存输出文件。NNI 的 SDK 会将文件从 Trial 容器的 `NNI_OUTPUT_DIR` 复制到 HDFS 上，目标路径为：`hdfs://host:port/{username}/nni/{experiments}/{experimentId}/trials/{trialId}/nnioutput`。
+使用 NNI 启动 Experiment 前，应在 nniManager 计算机中设置相应的挂载数据的路径。 OpenPAI 有自己的存储（NFS、AzureBlob ...），在 PAI 中使用的存储将在启动作业时挂载到容器中。 应通过 `paiStoragePlugin` 字段选择 OpenPAI 中的存储类型。 然后，应将存储挂载到 nniManager 计算机上，并在配置文件中设置 `nniManagerNFSMountPath`，NNI会生成 bash 文件并将 `codeDir` 中的数据拷贝到 `nniManagerNFSMountPath` 文件夹中，然后启动 Trial 任务。 `nniManagerNFSMountPath` 中的数据会同步到 OpenPAI 存储中，并挂载到 OpenPAI 的容器中。 容器中的数据路径在 `containerNFSMountPath` 设置，NNI 将进入该文件夹，运行脚本启动 Trial 任务。
 
 ## 版本校验
 
diff --git a/docs/zh_CN/TrainingService/PaiYarnMode.md b/docs/zh_CN/TrainingService/PaiYarnMode.md
new file mode 100644
index 0000000000..c84debfa55
--- /dev/null
+++ b/docs/zh_CN/TrainingService/PaiYarnMode.md
@@ -0,0 +1,128 @@
+**在 OpenPAIYarn 上运行 Experiment**
+===
+原始的 `pai` 模式改为了 `paiYarn` 模式，这是基于 Yarn 的分布式训练平台。
+
+## 设置环境
+参考[指南](../Tutorial/QuickStart.md)安装 NNI。
+
+## 运行 Experiment
+以 `examples/trials/mnist-annotation` 为例。 NNI 的 YAML 配置文件如下：
+
+```yaml
+authorName: your_name
+experimentName: auto_mnist
+# 并发运行的 Trial 数量
+trialConcurrency: 2
+# Experiment 的最长持续运行时间
+maxExecDuration: 3h
+# 空表示一直运行
+maxTrialNum: 100
+# 可选项: local, remote, pai, paiYarn
+trainingServicePlatform: paiYarn
+# 搜索空间文件
+searchSpacePath: search_space.json
+# 可选项: true, false
+useAnnotation: true
+tuner:
+  builtinTunerName: TPE
+  classArgs:
+    optimize_mode: maximize
+trial:
+  command: python3 mnist.py
+  codeDir: ~/nni/examples/trials/mnist-annotation
+  gpuNum: 0
+  cpuNum: 1
+  memoryMB: 8196
+  image: msranni/nni:latest
+# 配置访问的 OpenpaiYarn 集群
+paiYarnConfig:
+  userName: your_paiYarn_nni_user
+  passWord: your_paiYarn_password
+  host: 10.1.1.1
+```
+
+注意：如果用 paiYarn 模式运行，需要在 YAML 文件中设置 `trainingServicePlatform: paiYarn`。
+
+与[本机模式](LocalMode.md)，以及[远程计算机模式](RemoteMachineMode.md)相比，paiYarn 模式的 Trial 有额外的配置：
+* cpuNum
+    * 必填。 Trial 程序的 CPU 需求，必须为正数。
+* memoryMB
+    * 必填。 Trial 程序的内存需求，必须为正数。
+* image
+    * 必填。 在 paiYarn 模式中，Trial 程序由 OpenpaiYarn 在 [Docker 容器](https://www.docker.com/)中安排运行。 此键用来指定 Trial 程序的容器使用的 Docker 映像。
+    * [Docker Hub](https://hub.docker.com/) 上有预制的 NNI Docker 映像 [nnimsra/nni](https://hub.docker.com/r/msranni/nni/)。 它包含了用来启动 NNI Experiment 所依赖的所有 Python 包，Node 模块和 JavaScript。 生成此 Docker 映像的文件在[这里](https://github.com/Microsoft/nni/tree/master/deployment/docker/Dockerfile)。 可以直接使用此映像，或参考它来生成自己的映像。
+* virtualCluster
+    * 可选。 设置 OpenPAIYarn 的 virtualCluster，即虚拟集群。 如果未设置此参数，将使用默认（default）虚拟集群。
+* shmMB
+    * 可选。 设置 OpenPAIYarn 的 shmMB，即 Docker 中的共享内存。
+* authFile
+    * 可选。在使用 paiYarn 模式时，为私有 Docker 仓库设置认证文件，[见参考文档](https://github.com/microsoft/paiYarn/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpaiYarn-job)。提供 authFile 的本地路径即可， NNI 会上传此文件。
+* portList
+    * 可选。 设置 OpenPAIYarn 的 portList。指定了容器中使用的端口列表，[参考文档](https://github.com/microsoft/paiYarn/blob/b2324866d0280a2d22958717ea6025740f71b9f0/docs/job_tutorial.md#specification)。<br /> 示例如下： NNI 中的配置架构如下所示：
+    ```
+    portList:
+      - label: test
+        beginAt: 8080
+        portNumber: 2
+    ```
+    假设需要在 MNIST 示例中使用端口来运行 TensorBoard。 第一步是编写 `mnist.py` 的包装脚本 `launch_paiYarn.sh`。
+
+    ```bash
+    export TENSORBOARD_PORT=paiYarn_PORT_LIST_${paiYarn_CURRENT_TASK_ROLE_NAME}_0_tensorboard
+    tensorboard --logdir . --port ${!TENSORBOARD_PORT} &
+    python3 mnist.py
+    ```
+    portList 的配置部分如下：
+
+    ```yaml
+  trial:
+    command: bash launch_paiYarn.sh
+    portList:
+      - label: tensorboard
+        beginAt: 0
+        portNumber: 1
+    ```
+
+NNI 支持 OpenPAIYarn 中的两种认证授权方法，即密码和 paiYarn Token，[参考](https://github.com/microsoft/paiYarn/blob/b6bd2ab1c8890f91b7ac5859743274d2aa923c22/docs/rest-server/API.md#2-authentication)。 授权配置在 `paiYarnConfig` 字段中。 密码认证的 `paiYarnConfig` 配置如下：
+```
+paiYarnConfig:
+  userName: your_paiYarn_nni_user
+  passWord: your_paiYarn_password
+  host: 10.1.1.1
+```
+Token 认证的 `paiYarnConfig` 配置如下：
+```
+paiYarnConfig:
+  userName: your_paiYarn_nni_user
+  token: your_paiYarn_token
+  host: 10.1.1.1
+```
+
+完成并保存 NNI Experiment 配置文件后（例如可保存为：exp_paiYarn.yml），运行以下命令：
+```
+nnictl create --config exp_paiYarn.yml
+```
+来在 paiYarn 模式下启动 Experiment。 NNI 会为每个 Trial 创建 OpenPAIYarn 作业，作业名称的格式为 `nni_exp_{experiment_id}_trial_{trial_id}`。 可以在 OpenPAIYarn 集群的网站中看到 NNI 创建的作业，例如： ![](../../img/nni_paiYarn_joblist.jpg)
+
+注意：paiYarn 模式下，NNIManager 会启动 RESTful 服务，监听端口为 NNI 网页服务器的端口加1。 例如，如果网页端口为`8080`，那么 RESTful 服务器会监听在 `8081`端口，来接收运行在 Kubernetes 中的 Trial 作业的指标。 因此，需要在防火墙中启用端口 `8081` 的 TCP 协议，以允许传入流量。
+
+当一个 Trial 作业完成后，可以在 NNI 网页的概述页面（如：http://localhost:8080/oview）中查看 Trial 的信息。
+
+在 Trial 列表页面中展开 Trial 信息，点击如下的 logPath： ![](../../img/nni_webui_joblist.jpg)
+
+接着将会打开 HDFS 的 WEB 界面，并浏览到 Trial 的输出文件： ![](../../img/nni_trial_hdfs_output.jpg)
+
+在输出目录中可以看到三个文件：stderr, stdout, 以及 trial.log
+
+## 数据管理
+如果训练数据集不大，可放在 codeDir 中，NNI会将其上传到 HDFS，或者构建 Docker 映像来包含数据。 如果数据集非常大，则不可放在 codeDir 中，可参考此[指南](https://github.com/microsoft/paiYarn/blob/master/docs/user/storage.md)来将数据目录挂载到容器中。
+
+如果要将 Trial 的其它输出保存到 HDFS 上，如模型文件等，需要在 Trial 代码中使用 `NNI_OUTPUT_DIR` 来保存输出文件。NNI 的 SDK 会将文件从 Trial 容器的 `NNI_OUTPUT_DIR` 复制到 HDFS 上，目标路径为：`hdfs://host:port/{username}/nni/{experiments}/{experimentId}/trials/{trialId}/nnioutput`。
+
+## 版本校验
+从 0.6 开始，NNI 支持版本校验。确保 NNIManager 与 trialKeeper 的版本一致，避免兼容性错误。 检查策略：
+1. 0.6 以前的 NNIManager 可与任何版本的 trialKeeper 一起运行，trialKeeper 支持向后兼容。
+2. 从 NNIManager 0.6 开始，与 triakKeeper 的版本必须一致。 例如，如果 NNIManager 是 0.6 版，则 trialKeeper 也必须是 0.6 版。
+3. 注意，只有版本的前两位数字才会被检查。例如，NNIManager 0.6.1 可以和 trialKeeper 的 0.6 或 0.6.2 一起使用，但不能与 trialKeeper 的 0.5.1 或 0.7 版本一起使用。
+
+如果 Experiment 无法运行，而且不能确认是否是因为版本不匹配造成的，可以在 Web 界面检查是否有相关的错误消息。 ![](../../img/version_check.png)
diff --git a/docs/zh_CN/TrialExample/Cifar10Examples.md b/docs/zh_CN/TrialExample/Cifar10Examples.md
index f4118f71a9..76907be16a 100644
--- a/docs/zh_CN/TrialExample/Cifar10Examples.md
+++ b/docs/zh_CN/TrialExample/Cifar10Examples.md
@@ -1,4 +1,4 @@
-# CIFAR-10 样例
+# CIFAR-10 示例
 
 ## 概述
 
@@ -16,9 +16,9 @@
 
 #### 准备
 
-此样例需要安装 PyTorch。 PyTorch 安装包需要选择所基于的 Python 和 CUDA 版本。
+此示例需要安装 PyTorch。 PyTorch 安装包需要选择所基于的 Python 和 CUDA 版本。
 
-这是环境 python==3.5 且 cuda == 8.0 的样例，然后用下列命令来安装 [ PyTorch](https://pytorch.org/)：
+这是环境 python==3.5 且 cuda == 8.0 的示例，然后用下列命令来安装 [ PyTorch](https://pytorch.org/)：
 
 ```bash
 python3 -m pip install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl
@@ -55,15 +55,15 @@ python3 -m pip install torchvision
 
 **配置**
 
-这是在本机运行 Experiment 的样例（多GPU）：
+这是在本机运行 Experiment 的示例（多GPU）：
 
 代码：[examples/trials/cifar10_pytorch/config.yml](https://github.com/Microsoft/nni/blob/master/examples/trials/cifar10_pytorch/config.yml)
 
-这是在 OpenPAI 上运行 Experiment 的样例：
+这是在 OpenPAI 上运行 Experiment 的示例：
 
 代码：[examples/trials/cifar10_pytorch/config_pai.yml](https://github.com/Microsoft/nni/blob/master/examples/trials/cifar10_pytorch/config_pai.yml)
 
-*完整样例：[examples/trials/cifar10_pytorch/](https://github.com/Microsoft/nni/tree/master/examples/trials/cifar10_pytorch)*
+*完整示例：[examples/trials/cifar10_pytorch/](https://github.com/Microsoft/nni/tree/master/examples/trials/cifar10_pytorch)*
 
 #### 运行 Experiment
 
diff --git a/docs/zh_CN/TrialExample/GbdtExample.md b/docs/zh_CN/TrialExample/GbdtExample.md
index 6467d03213..692528b6d5 100644
--- a/docs/zh_CN/TrialExample/GbdtExample.md
+++ b/docs/zh_CN/TrialExample/GbdtExample.md
@@ -168,7 +168,7 @@ if __name__ == '__main__':
 * 路径设置：`searchSpacePath`, `trial codeDir`，等等。
 * 算法设置：选择 `Tuner` 算法，`优化方向`，等等。
 
-config.yml 样例：
+config.yml 示例：
 
 ```yaml
 authorName: default
diff --git a/docs/zh_CN/TrialExample/MnistExamples.md b/docs/zh_CN/TrialExample/MnistExamples.md
index 04eeddd20d..79647528a5 100644
--- a/docs/zh_CN/TrialExample/MnistExamples.md
+++ b/docs/zh_CN/TrialExample/MnistExamples.md
@@ -1,6 +1,6 @@
-# MNIST 样例
+# MNIST 示例
 
-在深度学习中，用 CNN 来分类 MNIST 数据，就像介绍编程语言中的 `hello world` 样例。 因此，NNI 将 MNIST 作为样例来介绍功能。 样例如下：
+在深度学习中，用 CNN 来分类 MNIST 数据，就像介绍编程语言中的 `hello world` 示例。 因此，NNI 将 MNIST 作为示例来介绍功能。 示例如下：
 
 - [MNIST 中使用 NNI API (TensorFlow v1.x)](#mnist-tfv1)
 - [MNIST 中使用 NNI API (TensorFlow v2.x)](#mnist-tfv2)
@@ -15,7 +15,7 @@
 <a name="mnist-tfv1"></a>
 **MNIST 中使用 NNI API (TensorFlow v1.x)**
 
-这是个简单的卷积网络，有两个卷积层，两个池化层和一个全连接层。 调优的超参包括 dropout 比率，卷积层大小，隐藏层（全连接层）大小等等。 它能用 NNI 中大部分内置的 Tuner 来调优，如 TPE，SMAC，Random。 样例的 YAML 文件也启用了评估器来提前终止一些中间结果不好的尝试。
+这是个简单的卷积网络，有两个卷积层，两个池化层和一个全连接层。 调优的超参包括 dropout 比率，卷积层大小，隐藏层（全连接层）大小等等。 它能用 NNI 中大部分内置的 Tuner 来调优，如 TPE，SMAC，Random。 示例的 YAML 文件也启用了评估器来提前终止一些中间结果不好的尝试。
 
 `代码目录: examples/trials/mnist-tfv1/`
 
@@ -29,48 +29,48 @@
 <a name="mnist-annotation"></a>
 **MNIST 中使用 NNI 标记（annotation）**
 
-此样例与上例类似，上例使用的是 NNI API 来指定搜索空间并返回结果，而此例使用的是 NNI 标记。
+此示例与上例类似，上例使用的是 NNI API 来指定搜索空间并返回结果，而此例使用的是 NNI 标记。
 
 `代码目录: examples/trials/mnist-annotation/`
 
 <a name="mnist-keras"></a>
 **在 Keras 中使用 MNIST**
 
-此样例由 Keras 实现。 这也是 MNIST 数据集的网络，包括两个卷积层，一个池化层和两个全连接层。
+此示例由 Keras 实现。 这也是 MNIST 数据集的网络，包括两个卷积层，一个池化层和两个全连接层。
 
 `代码目录: examples/trials/mnist-keras/`
 
 <a name="mnist-batch"></a>
 **MNIST -- 用批处理 Tuner 来调优**
 
-此样例演示了如何使用批处理 Tuner。 只需要在搜索空间文件中列出所有要尝试的配置， NNI 会逐个尝试。
+此示例演示了如何使用批处理 Tuner。 只需要在搜索空间文件中列出所有要尝试的配置， NNI 会逐个尝试。
 
 `代码目录: examples/trials/mnist-batch-tune-keras/`
 
 <a name="mnist-hyperband"></a>
 **MNIST -- 用 hyperband 调优**
 
-此样例演示了如何使用 hyperband 来调优模型。 在尝试收到的配置中，有个主键叫做 `STEPS`，尝试要用它来控制运行多长时间（例如，控制迭代的次数）。
+此示例演示了如何使用 hyperband 来调优模型。 在尝试收到的配置中，有个主键叫做 `STEPS`，尝试要用它来控制运行多长时间（例如，控制迭代的次数）。
 
 `代码目录: examples/trials/mnist-hyperband/`
 
 <a name="mnist-nested"></a>
 **MNIST -- 用嵌套搜索空间调优**
 
-此样例演示了 NNI 如何支持嵌套的搜索空间。 搜索空间文件示了如何定义嵌套的搜索空间。
+此示例演示了 NNI 如何支持嵌套的搜索空间。 搜索空间文件示了如何定义嵌套的搜索空间。
 
 `代码目录: examples/trials/mnist-nested-search-space/`
 
 <a name="mnist-kubeflow-tf"></a>
 **用 Kubeflow 运行分布式的 MNIST (tensorflow)**
 
-此样例展示了如何通过 NNI 来在 Kubeflow 上运行分布式训练。 只需要简单的提供分布式训练代码，并在配置文件中指定 kubeflow 模式。 例如，运行 ps 和 worker 的命令行，以及各自需要的资源。 此样例使用了 Tensorflow 来实现，因而，需要使用 Kubeflow 的 tf-operator。
+此示例展示了如何通过 NNI 来在 Kubeflow 上运行分布式训练。 只需要简单的提供分布式训练代码，并在配置文件中指定 kubeflow 模式。 例如，运行 ps 和 worker 的命令行，以及各自需要的资源。 此示例使用了 Tensorflow 来实现，因而，需要使用 Kubeflow 的 tf-operator。
 
 `代码目录: examples/trials/mnist-distributed/`
 
 <a name="mnist-kubeflow-pytorch"></a>
 **用 Kubeflow 运行分布式的 MNIST (PyTorch)**
 
-与前面的样例类似，不同之处是此样例是 Pytorch 实现的，因而需要使用 Kubeflow 的 pytorch-operator。
+与前面的示例类似，不同之处是此示例是 Pytorch 实现的，因而需要使用 Kubeflow 的 pytorch-operator。
 
 `代码目录: examples/trials/mnist-distributed-pytorch/`
\ No newline at end of file
diff --git a/docs/zh_CN/TrialExample/SklearnExamples.md b/docs/zh_CN/TrialExample/SklearnExamples.md
index 41244e5438..36f9b6fa67 100644
--- a/docs/zh_CN/TrialExample/SklearnExamples.md
+++ b/docs/zh_CN/TrialExample/SklearnExamples.md
@@ -4,7 +4,7 @@
 
 NNI 支持多种调优算法来为 scikit-learn 搜索最好的模型和超参，并支持本机、远程服务器和云服务等多种环境。
 
-## 1. 如何运行此样例
+## 1. 如何运行此示例
 
 安装 NNI 包，并使用命令行工具 `nnictl` 来启动 Experiment。 有关安装和环境准备的内容，参考[这里](../Tutorial/QuickStart.md)。
 
@@ -14,7 +14,7 @@ NNI 支持多种调优算法来为 scikit-learn 搜索最好的模型和超参
 nnictl create --config ./config.yml
 ```
 
-## 2. 样例概述
+## 2. 示例概述
 
 ### 2.1 分类
 
@@ -24,7 +24,7 @@ nnictl create --config ./config.yml
 
 ### 2.2 回归
 
-此样例使用了波士顿房价数据，数据集由波士顿各地区房价所组成，还包括了房屋的周边信息，例如：犯罪率 (CRIM)，非零售业务的面积 (INDUS)，房主年龄 (AGE) 等等。这些信息可用来预测波士顿的房价。
+此示例使用了波士顿房价数据，数据集由波士顿各地区房价所组成，还包括了房屋的周边信息，例如：犯罪率 (CRIM)，非零售业务的面积 (INDUS)，房主年龄 (AGE) 等等。这些信息可用来预测波士顿的房价。
 
 本例中，尝试了不同的回归模型，包括 `"LinearRegression", "SVR", "KNeighborsRegressor", "DecisionTreeRegressor"` 和一些参数，如 `"svr_kernel", "knr_weights"`。 关于这些模型算法和参数的更多信息，可参考[这里](https://scikit-learn.org/stable/supervised_learning.html#supervised-learning)。
 
diff --git a/docs/zh_CN/TrialExample/SquadEvolutionExamples.md b/docs/zh_CN/TrialExample/SquadEvolutionExamples.md
index 89e7165227..d512401d08 100644
--- a/docs/zh_CN/TrialExample/SquadEvolutionExamples.md
+++ b/docs/zh_CN/TrialExample/SquadEvolutionExamples.md
@@ -1,6 +1,6 @@
 # 在阅读理解上使用自动模型架构搜索
 
-该样例展示了如何使用遗传算法为阅读理解任务找到好的模型架构。
+该示例展示了如何使用遗传算法为阅读理解任务找到好的模型架构。
 
 ## 1. 搜索空间
 
@@ -20,7 +20,7 @@
 
 另一个时间更快，性能更好的版本正在开发中。 很快将发布。
 
-## 2. 如何在本机运行此样例？
+## 2. 如何在本机运行此示例？
 
 ### 2.1 使用下载脚本来下载数据
 
@@ -81,7 +81,7 @@ trial:
 nnictl create --config ~/nni/examples/trials/ga_squad/config.yml
 ```
 
-## 3 在 OpenPAI 上运行此样例
+## 3 在 OpenPAI 上运行此示例
 
 根据上传大小的限制，仅上传源代码，并在训练过程中下载数据。 本 Experiment 需要的内存 `memoryMB >= 32G`，训练过程可能需要数小时。
 
@@ -140,7 +140,7 @@ nnictl create --config ~/nni/examples/trials/ga_squad/config_pai.yml
 
 ### 4.1 实现方法
 
-基于进化算法架构的问答和其它样例一样，有两个部分：Trial 和 Tuner。
+基于进化算法架构的问答和其它示例一样，有两个部分：Trial 和 Tuner。
 
 ### 4.2 Trial
 
@@ -251,7 +251,7 @@ class CustomerTuner(Tuner):
 
 ### 4.4 模型配置格式
 
-这是模型配置的样例，在架构搜索过程中，从 Tuner 传入 Trial 的代码。
+这是模型配置的示例，在架构搜索过程中，从 Tuner 传入 Trial 的代码。
 
 ```json
 {
diff --git a/docs/zh_CN/TrialExample/Trials.md b/docs/zh_CN/TrialExample/Trials.md
index 18d4dc6235..510226c180 100644
--- a/docs/zh_CN/TrialExample/Trials.md
+++ b/docs/zh_CN/TrialExample/Trials.md
@@ -2,7 +2,7 @@
 
 **Trial（尝试）**是将一组参数组合（例如，超参）在模型上独立的一次尝试。
 
-定义 NNI 的 Trial，需要首先定义参数组，并更新模型代码。 NNI 有两种方法来实现 Trial：[NNI API](#nni-api) 以及 [NNI Python annotation](#nni-annotation)。 参考[这里的](#more-examples)更多 Trial 样例。
+定义 NNI 的 Trial，需要首先定义参数组，并更新模型代码。 NNI 有两种方法来实现 Trial：[NNI API](#nni-api) 以及 [NNI Python annotation](#nni-annotation)。 参考[这里的](#more-examples)更多 Trial 示例。
 
 <a name="nni-api"></a>
 
@@ -10,7 +10,7 @@
 
 ### 第一步：准备搜索空间参数文件。
 
-样例如下：
+示例如下：
 
 ```json
 {
@@ -156,7 +156,7 @@ nni.get_sequence_id＃返回 0
 
 每个 Trial 都有单独的目录来输出自己的数据。 在每次 Trial 运行后，环境变量 `NNI_OUTPUT_DIR` 定义的目录都会被导出。 在这个目录中可以看到 Trial 的代码、数据和日志。 此外，Trial 的日志（包括 stdout）还会被重定向到此目录中的 `trial.log` 文件。
 
-如果使用了 Annotation 方法，转换后的 Trial 代码会存放在另一个临时目录中。 可以在 `run.sh` 文件中的 `NNI_OUTPUT_DIR` 变量找到此目录。 文件中的第二行（即：`cd`）会切换到代码所在的实际路径。 参考 `run.sh` 文件样例：
+如果使用了 Annotation 方法，转换后的 Trial 代码会存放在另一个临时目录中。 可以在 `run.sh` 文件中的 `NNI_OUTPUT_DIR` 变量找到此目录。 文件中的第二行（即：`cd`）会切换到代码所在的实际路径。 参考 `run.sh` 文件示例：
 
 ```bash
 #!/bin/bash
@@ -182,7 +182,7 @@ echo $? `date +%s%3N` >/home/user_name/nni/experiments/$experiment_id$/trials/$t
 
 ## 更多 Trial 的示例
 
-* [MNIST 样例](MnistExamples.md)
+* [MNIST 示例](MnistExamples.md)
 * [为 CIFAR 10 分类找到最佳的 optimizer](Cifar10Examples.md)
 * [如何在 NNI 调优 SciKit-learn 的参数](SklearnExamples.md)
 * [在阅读理解上使用自动模型架构搜索。](SquadEvolutionExamples.md)
diff --git a/docs/zh_CN/Tuner/BuiltinTuner.md b/docs/zh_CN/Tuner/BuiltinTuner.md
index 76f6d694d2..2de9b34741 100644
--- a/docs/zh_CN/Tuner/BuiltinTuner.md
+++ b/docs/zh_CN/Tuner/BuiltinTuner.md
@@ -1,4 +1,4 @@
-# 内置 Tuner
+# 内置的超参调优 Tuner
 
 NNI 提供了先进的调优算法，使用上也很简单。 下面是内置 Tuner 的简单介绍：
 
@@ -13,7 +13,7 @@ NNI 提供了先进的调优算法，使用上也很简单。 下面是内置 Tu
 | [**Anneal（退火算法）**](#Anneal)              | 这种简单的退火算法从先前的采样开始，会越来越靠近发现的最佳点取样。 此算法是随机搜索的简单变体，利用了反应曲面的平滑性。 退火率不是自适应的。                                                                                                                                                                                                                       |
 | [**Naïve Evolution（进化算法）**](#Evolution)  | Naïve Evolution（朴素进化算法）来自于 Large-Scale Evolution of Image Classifiers。 它会基于搜索空间随机生成一个种群。 在每一代中，会选择较好的结果，并对其下一代进行一些变异（例如，改动一个超参，增加或减少一层）。 Naïve Evolution 需要很多次 Trial 才能有效，但它也非常简单，也很容易扩展新功能。 [参考论文](https://arxiv.org/pdf/1703.01041.pdf)                                                     |
 | [**SMAC**](#SMAC)                        | SMAC 基于 Sequential Model-Based Optimization (SMBO，即序列的基于模型优化方法)。 它利用使用过的结果好的模型（高斯随机过程模型），并将随机森林引入到 SMBO 中，来处理分类参数。 SMAC 算法包装了 Github 的 SMAC3。 注意：SMAC 需要通过 `nnictl package` 命令来安装。 [参考论文，](https://www.cs.ubc.ca/~hutter/papers/10-TR-SMAC.pdf) [Github 代码库](https://github.com/automl/SMAC3) |
-| [**Batch Tuner（批量调参器）**](#Batch)         | Batch Tuner 能让用户简单的提供几组配置（如，超参选项的组合）。 当所有配置都执行完后，Experiment 即结束。 Batch Tuner 仅支持 choice 类型。                                                                                                                                                                                                   |
+| [**Batch Tuner（批处理 Tuner）**](#Batch)     | Batch Tuner 能让用户简单的提供几组配置（如，超参选项的组合）。 当所有配置都执行完后，Experiment 即结束。 Batch Tuner 仅支持 choice 类型。                                                                                                                                                                                                   |
 | [**Grid Search（遍历搜索）**](#GridSearch)     | Grid Search 会穷举定义在搜索空间文件中的所有超参组合。 遍历搜索可以使用的类型有 choice, quniform, randint。                                                                                                                                                                                                                     |
 | [**Hyperband**](#Hyperband)              | Hyperband 试图用有限的资源来探索尽可能多的组合，并发现最好的结果。 它的基本思路是生成大量的配置，并使用少量的资源来找到有可能好的配置，然后继续训练找到其中更好的配置。 [参考论文](https://arxiv.org/pdf/1603.06560.pdf)                                                                                                                                                        |
 | [**Network Morphism**](#NetworkMorphism) | Network Morphism 提供了深度学习模型的自动架构搜索功能。 每个子网络都继承于父网络的知识和形态，并变换网络的不同形态，包括深度，宽度，跨层连接（skip-connection）。 然后使用历史的架构和指标，来估计子网络的值。 然后会选择最有希望的模型进行训练。 [参考论文](https://arxiv.org/abs/1806.10282)                                                                                                           |
@@ -26,7 +26,7 @@ NNI 提供了先进的调优算法，使用上也很简单。 下面是内置 Tu
 
 要使用 NNI 内置的 Tuner，需要在 `config.yml` 文件中添加 **builtinTunerName** 和 **classArgs**。 这一节会介绍推荐的场景、参数等详细用法以及示例。
 
-注意：参考样例中的格式来创建新的 `config.yml` 文件。 一些内置的 Tuner 还需要通过 `nnictl package` 命令先安装，如 SMAC。
+注意：参考示例中的格式来创建新的 `config.yml` 文件。 一些内置的 Tuner 还需要通过 `nnictl package` 命令先安装，如 SMAC。
 
 <a name="TPE"></a>
 
@@ -158,6 +158,7 @@ nnictl package install --name=SMAC
 **参数**
 
 * **optimize_mode** (*maximize 或 minimize, 可选项, 默认值为 maximize*) - 如果为 'maximize'，表示 Tuner 的目标是将指标最大化。 如果为 'minimize'，表示 Tuner 的目标是将指标最小化。
+* **config_dedup** (*True 或 False, 可选, 默认为 False*) - 如果为 True，则 Tuner 不会生成重复的配置。 如果为 False，则配置可能会重复生成，但对于相对较大的搜索空间，此概率较小。
 
 **示例**
 
@@ -274,7 +275,7 @@ NetworkMorphism 需要先安装 [PyTorch](https://pytorch.org/get-started/locall
 
 **建议场景**
 
-需要将深度学习方法应用到自己的任务（自己的数据集）上，但不清楚该如何选择或设计网络。 可修改[样例](https://github.com/Microsoft/nni/tree/master/examples/trials/network_morphism/cifar10/cifar10_keras.py)来适配自己的数据集和数据增强方法。 也可以修改批处理大小，学习率或优化器。 它可以为不同的任务找到好的网络架构。 当前，此 Tuner 仅支持视觉领域。 [详细说明](./NetworkmorphismTuner.md)
+需要将深度学习方法应用到自己的任务（自己的数据集）上，但不清楚该如何选择或设计网络。 可修改[示例](https://github.com/Microsoft/nni/tree/master/examples/trials/network_morphism/cifar10/cifar10_keras.py)来适配自己的数据集和数据增强方法。 也可以修改批处理大小，学习率或优化器。 它可以为不同的任务找到好的网络架构。 当前，此 Tuner 仅支持视觉领域。 [详细说明](./NetworkmorphismTuner.md)
 
 **参数**
 
@@ -310,7 +311,7 @@ tuner:
 
 **建议场景**
 
-与 TPE 和 SMAC 类似，Metis 是黑盒 Tuner。 如果系统需要很长时间才能完成一次 Trial，Metis 就比随机搜索等其它方法要更合适。 此外，Metis 还为接下来的 Trial 提供了候选。 如何使用 Metis 的[样例](https://github.com/Microsoft/nni/tree/master/examples/trials/auto-gbdt/search_space_metis.json)。 通过调用 NNI 的 SDK，用户只需要发送`精度`这样的最终结果给 Tuner。 [详细说明](./MetisTuner.md)
+与 TPE 和 SMAC 类似，Metis 是黑盒 Tuner。 如果系统需要很长时间才能完成一次 Trial，Metis 就比随机搜索等其它方法要更合适。 此外，Metis 还为接下来的 Trial 提供了候选。 如何使用 Metis 的[示例](https://github.com/Microsoft/nni/tree/master/examples/trials/auto-gbdt/search_space_metis.json)。 通过调用 NNI 的 SDK，用户只需要发送`精度`这样的最终结果给 Tuner。 [详细说明](./MetisTuner.md)
 
 **参数**
 
@@ -449,4 +450,12 @@ tuner:
   builtinTunerName: PPOTuner
   classArgs:
     optimize_mode: maximize
-```
\ No newline at end of file
+```
+
+## **参考和反馈**
+
+* 在 GitHub 中[提交此功能的 Bug](https://github.com/microsoft/nni/issues/new?template=bug-report.md)；
+* 在 GitHub 中[提交新功能或改进请求](https://github.com/microsoft/nni/issues/new?template=enhancement.md)；
+* 了解 NNI 中[特征工程的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/FeatureEngineering/Overview.md)；
+* 了解 NNI 中[ NAS 的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/NAS/Overview.md)；
+* 了解 NNI 中[模型自动压缩的更多信息](https://github.com/microsoft/nni/blob/master/docs/zh_CN/Compressor/Overview.md)；
\ No newline at end of file
diff --git a/docs/zh_CN/Tuner/CustomizeAdvisor.md b/docs/zh_CN/Tuner/CustomizeAdvisor.md
index 5f0e7e1ed8..152c5dd9c5 100644
--- a/docs/zh_CN/Tuner/CustomizeAdvisor.md
+++ b/docs/zh_CN/Tuner/CustomizeAdvisor.md
@@ -16,7 +16,7 @@ class CustomizedAdvisor(MsgDispatcherBase):
         ...
 ```
 
-**2. 实现所有除了 `handle_request` 外的，以 `handle_` 前缀开始的方法**。 [此文档](https://nni.readthedocs.io/en/latest/sdk_reference.html#nni.msg_dispatcher_base.MsgDispatcherBase)可帮助理解 `MsgDispatcherBase`。
+**2. 实现所有除了 `handle_request` 外的，以 `handle_` 前缀开始的方法**。 [此文档](https://nni.readthedocs.io/zh/latest/sdk_reference.html#nni.msg_dispatcher_base.MsgDispatcherBase)可帮助理解 `MsgDispatcherBase`。
 
 **3. 在 Experiment 的 YAML 文件中配置好自定义的 Advisor。**
 
@@ -33,6 +33,8 @@ advisor:
     arg1: value1
 ```
 
+**注意：**Advisor 的工作目录是`<home>/nni/experiments/<experiment_id>/log` 可从环境变量 `NNI_LOG_DIRECTORY` 中获取。
+
 ## 示例
 
 参考[示例](https://github.com/microsoft/nni/tree/master/examples/tuners/mnist_keras_customized_advisor)。
\ No newline at end of file
diff --git a/docs/zh_CN/Tuner/CustomizeTuner.md b/docs/zh_CN/Tuner/CustomizeTuner.md
index 2bbd522cea..1588dc2600 100644
--- a/docs/zh_CN/Tuner/CustomizeTuner.md
+++ b/docs/zh_CN/Tuner/CustomizeTuner.md
@@ -10,7 +10,7 @@ NNI 在内置的 Tuner 中提供了最新的调优算法。 NNI 同时也支持
 2. 实现 receive_trial_result 和 generate_parameter 函数
 3. 在 Experiment 的 YAML 文件中配置好自定义的 Tuner
 
-样例如下：
+示例如下：
 
 **1. 继承 Tuner 基类**
 
@@ -76,7 +76,7 @@ parameters = {"dropout": 0.3, "learning_rate": 0.4}
 value = 0.93
 ```
 
-**注意** 如果需要存取自定义的 Tuner 目录里的文件 (如, `data.txt`)，不能使用 `open('data.txt', 'r')`。 要使用：
+**注意：**Tuner 的工作目录是 `<home>/nni/experiments/<experiment_id>/log`，可使用环境变量 `NNI_LOG_DIRECTORY`，因此 ，如果要访问自己 Tuner 目录中的文件（如： `data.txt`）不能直接使用 `open('data.txt', 'r')`。 要使用：
 
 ```python
 _pwd = os.path.dirname(__file__)
@@ -101,7 +101,7 @@ tuner:
 
 ```
 
-更多样例，可参考：
+更多示例，可参考：
 
 > - [evolution-tuner](https://github.com/Microsoft/nni/tree/master/src/sdk/pynni/nni/evolution_tuner)
 > - [hyperopt-tuner](https://github.com/Microsoft/nni/tree/master/src/sdk/pynni/nni/hyperopt_tuner)
diff --git a/docs/zh_CN/Tuner/HyperbandAdvisor.md b/docs/zh_CN/Tuner/HyperbandAdvisor.md
index 8fe96752b1..75a96e3a69 100644
--- a/docs/zh_CN/Tuner/HyperbandAdvisor.md
+++ b/docs/zh_CN/Tuner/HyperbandAdvisor.md
@@ -6,7 +6,7 @@
 
 ## 2. 实现并行
 
-首先，此样例是基于 MsgDispatcherBase 来实现的自动机器学习算法，而不是基于 Tuner 和Assessor。 这种实现方法下，Hyperband 集成了 Tuner 和 Assessor 两者的功能，因而将它叫做 Advisor。
+首先，此示例是基于 MsgDispatcherBase 来实现的自动机器学习算法，而不是基于 Tuner 和Assessor。 这种实现方法下，Hyperband 集成了 Tuner 和 Assessor 两者的功能，因而将它叫做 Advisor。
 
 其次，本实现完全利用了 Hyperband 内部的并行性。 具体来说，下一个分组不会严格的在当前分组结束后再运行，只要有资源，就可以开始运行新的分组。
 
@@ -30,11 +30,11 @@
 
 对于 Trial 代码中 `report_intermediate_result(metric)` 和 `report_final_result(metric)` 的**`指标` 应该是数值，或者用一个 dict，并保证其中有键值为 default 的项目，其值也为数值型**。 这是需要进行最大化或者最小化优化的数值，如精度或者损失度。
 
-`R` 和 `eta` 是 Hyperband 中可以改动的参数。 `R` 表示可以分配给 Trial 的最大资源。 这里，资源可以代表 epoch 或 批处理数量。 `TRIAL_BUDGET` 应该被尝试代码用来控制运行的次数。 参考样例 `examples/trials/mnist-advisor/` ，了解详细信息。
+`R` 和 `eta` 是 Hyperband 中可以改动的参数。 `R` 表示可以分配给 Trial 的最大资源。 这里，资源可以代表 epoch 或 批处理数量。 `TRIAL_BUDGET` 应该被尝试代码用来控制运行的次数。 参考示例 `examples/trials/mnist-advisor/` ，了解详细信息。
 
 `eta` 表示 `n` 个配置中的 `n/eta` 个配置会留存下来，并用更多的资源来运行。
 
-下面是 `R=81` 且 `eta=3` 时的样例：
+下面是 `R=81` 且 `eta=3` 时的示例：
 
 |   | s=4  | s=3  | s=2  | s=1  | s=0  |
 | - | ---- | ---- | ---- | ---- | ---- |
diff --git a/docs/zh_CN/Tuner/NetworkmorphismTuner.md b/docs/zh_CN/Tuner/NetworkmorphismTuner.md
index ac9cf332eb..60d924d6a5 100644
--- a/docs/zh_CN/Tuner/NetworkmorphismTuner.md
+++ b/docs/zh_CN/Tuner/NetworkmorphismTuner.md
@@ -119,11 +119,11 @@ Tuner 有大量的文件、函数和类。 这里只简单介绍最重要的文
 - `layer_transformer.py` 包含了一些层转换，包括变宽，变深，或在层中增加跳跃连接。
 - `nn.py` 包含生成初始化网的类。
 - `metric.py` 包括了一些指标类，如 Accuracy 和 MSE。
-- `utils.py` 是使用 Keras 在数据集 `cifar10` 上搜索神经网络的样例。
+- `utils.py` 是使用 Keras 在数据集 `cifar10` 上搜索神经网络的示例。
 
-## 4. 网络表示的 JSON 样例
+## 4. 网络表示的 JSON 示例
 
-这是定义的中间表示 JSON 样例，在架构搜索过程中会从 Tuner 传到 Trial。 可调用 "json\_to\_graph()" 函数来将 JSON 文件转化为 Pytoch 或 Keras 模型。 样例如下。
+这是定义的中间表示 JSON 示例，在架构搜索过程中会从 Tuner 传到 Trial。 可调用 "json\_to\_graph()" 函数来将 JSON 文件转化为 Pytoch 或 Keras 模型。 示例如下。
 
 ```json
 {
diff --git a/docs/zh_CN/Tutorial/AnnotationSpec.md b/docs/zh_CN/Tutorial/AnnotationSpec.md
index 8f9689cad4..129918b219 100644
--- a/docs/zh_CN/Tutorial/AnnotationSpec.md
+++ b/docs/zh_CN/Tutorial/AnnotationSpec.md
@@ -4,7 +4,7 @@
 
 为了获得良好的用户体验并减少对以后代码的影响，NNI 设计了通过 Annotation（标记）来使用的语法。 通过 Annotation，只需要在代码中加入一些注释字符串，就能启用 NNI，完全不影响代码原先的执行逻辑。
 
-样例如下：
+示例如下：
 
 ```python
 '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
@@ -12,7 +12,7 @@ learning_rate = 0.1
 
 ```
 
-此样例中，NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation，是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息，来给这一行的变量赋上相应的值。
+此示例中，NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation，是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息，来给这一行的变量赋上相应的值。
 
 通过这种方式，不需要修改任何代码，代码既可以直接运行，又可以使用 NNI 来调参。
 
@@ -44,7 +44,7 @@ NNI 支持如下 10 种类型来表示搜索空间：
 - `@nni.variable(nni.lognormal(mu, sigma),name=variable)` 变量值分布的公式为： exp(normal(mu, sigma))
 - `@nni.variable(nni.qlognormal(mu, sigma, q),name=variable)` 变量值分布的公式为： round(exp(normal(mu, sigma)) / q) * q
 
-样例如下：
+示例如下：
 
 ```python
 '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
diff --git a/docs/zh_CN/Tutorial/ExperimentConfig.md b/docs/zh_CN/Tutorial/ExperimentConfig.md
index 5d441d1877..590f966439 100644
--- a/docs/zh_CN/Tutorial/ExperimentConfig.md
+++ b/docs/zh_CN/Tutorial/ExperimentConfig.md
@@ -69,7 +69,7 @@
       - [password](#password)
       - [token](#token)
       - [host](#host)
-  - [样例](#examples) 
+  - [示例](#examples) 
     - [本机模式](#local-mode)
     - [远程模式](#remote-mode)
     - [PAI 模式](#pai-mode)
diff --git a/docs/zh_CN/Tutorial/Nnictl.md b/docs/zh_CN/Tutorial/Nnictl.md
index e6f319b608..acee5d4534 100644
--- a/docs/zh_CN/Tutorial/Nnictl.md
+++ b/docs/zh_CN/Tutorial/Nnictl.md
@@ -50,7 +50,7 @@ nnictl 支持的命令：
   | --port, -p   | False |     | RESTful 服务的端口          |
   | --debug, -d  | False |     | 设置为调试模式                |
 
-* 样例
+* 示例
   
   > 在默认端口 8080 上创建一个新的 Experiment
   
@@ -98,7 +98,7 @@ nnictl 支持的命令：
   | --port, -p  | False |     | 要恢复的 Experiment 使用的 RESTful 服务端口 |
   | --debug, -d | False |     | 设置为调试模式                          |
 
-* 样例
+* 示例
   
   > 在指定的端口 8088 上恢复 Experiment
   
@@ -241,7 +241,7 @@ nnictl 支持的命令：
   | id          | False |     | 需要设置的 Experiment 的 ID |
   | --value, -v | True  |     | 允许同时运行的 Trial 的数量     |
   
-  * 样例
+  * 示例
     
     > 更新 Experiment 的并发数量
     
@@ -499,7 +499,7 @@ nnictl 支持的命令：
   
   * 详细说明
     
-    NNI 支持导入用户的数据，确保数据格式正确。 样例如下：
+    NNI 支持导入用户的数据，确保数据格式正确。 示例如下：
     
     ```json
     [
diff --git a/docs/zh_CN/Tutorial/QuickStart.md b/docs/zh_CN/Tutorial/QuickStart.md
index 6cc20885e8..3ed05f3e68 100644
--- a/docs/zh_CN/Tutorial/QuickStart.md
+++ b/docs/zh_CN/Tutorial/QuickStart.md
@@ -26,7 +26,7 @@
 
 NNI 是一个能进行自动机器学习实验的工具包。 它可以自动进行获取超参、运行 Trial，测试结果，调优超参的循环。 下面会展示如何使用 NNI 来找到最佳超参组合。
 
-这是还**没有 NNI** 的样例代码，用 CNN 在 MNIST 数据集上训练：
+这是还**没有 NNI** 的示例代码，用 CNN 在 MNIST 数据集上训练：
 
 ```python
 def run_trial(params):
diff --git a/docs/zh_CN/Tutorial/SearchSpaceSpec.md b/docs/zh_CN/Tutorial/SearchSpaceSpec.md
index 961e555ecd..2b64721e1d 100644
--- a/docs/zh_CN/Tutorial/SearchSpaceSpec.md
+++ b/docs/zh_CN/Tutorial/SearchSpaceSpec.md
@@ -6,7 +6,7 @@
 
 要定义搜索空间，需要定义变量名称、采样策略的类型及其参数。
 
-* 搜索空间样例如下：
+* 搜索空间示例如下：
 
 ```yaml
 {
@@ -19,7 +19,7 @@
 
 ```
 
-将第一行作为样例。 `dropout_rate` 定义了一个变量，先验分布为均匀分布，范围从 `0.1` 到 `0.5`。
+将第一行作为示例。 `dropout_rate` 定义了一个变量，先验分布为均匀分布，范围从 `0.1` 到 `0.5`。
 
 注意，搜索空间的效果与 Tuner 高度相关。 此处列出了内置 Tuner 所支持的类型。 对于自定义的 Tuner，不必遵循鞋标，可使用任何的类型。
 
diff --git a/docs/zh_CN/conf.py b/docs/zh_CN/conf.py
index 9db2b8a62d..f1336f1c78 100644
--- a/docs/zh_CN/conf.py
+++ b/docs/zh_CN/conf.py
@@ -12,23 +12,23 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+from recommonmark.transform import AutoStructify
+from recommonmark.parser import CommonMarkParser
 import os
 import sys
 sys.path.insert(0, os.path.abspath('../../src/sdk/pynni'))
 
-from recommonmark.parser import CommonMarkParser
-from recommonmark.transform import AutoStructify
 
 # -- Project information ---------------------------------------------------
 
-project = 'Neural Network Intelligence'
-copyright = '2019, Microsoft'
+project = 'NNI'
+copyright = '2020, Microsoft'
 author = 'Microsoft'
 
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = 'v1.2'
+release = 'v1.3'
 
 # -- General configuration ---------------------------------------------------
 
@@ -109,6 +109,8 @@
 # html_sidebars = {}
 
 html_logo = '../img/nni_logo_dark.png'
+html_title = '支持神经网络结构搜索、模型压缩、超参调优的开源自动机器学习工具 (%s %s)' % \
+    (project, release)
 
 # -- Options for HTMLHelp output ---------------------------------------------
 
@@ -190,6 +192,6 @@ def setup(app):
     app.add_config_value('recommonmark_config', {
         'enable_eval_rst': True,
         'enable_auto_toc_tree': False,
-            }, True)
+    }, True)
     app.add_transform(AutoStructify)
     app.add_stylesheet('css/custom.css')
diff --git a/docs/zh_CN/examples.rst b/docs/zh_CN/examples.rst
index e27d9d9c19..f76ce9eb3d 100644
--- a/docs/zh_CN/examples.rst
+++ b/docs/zh_CN/examples.rst
@@ -1,5 +1,5 @@
 ######################
-样例
+示例
 ######################
 
 ..  toctree::
diff --git a/docs/zh_CN/index.rst b/docs/zh_CN/index.rst
index 65e2314a77..775cbb5f08 100644
--- a/docs/zh_CN/index.rst
+++ b/docs/zh_CN/index.rst
@@ -1,6 +1,6 @@
-#########################################
-Neural Network Intelligence（NNI）文档
-#########################################
+###########################
+Neural Network Intelligence
+###########################
 
 ********
 内容
diff --git a/examples/assessors/README_zh_CN.md b/examples/assessors/README_zh_CN.md
index 02ffb3c51b..5e0d72316a 100644
--- a/examples/assessors/README_zh_CN.md
+++ b/examples/assessors/README_zh_CN.md
@@ -56,6 +56,6 @@ main()
 
 也可以重载 Assessor 的 `run` 函数来控制过程逻辑。
 
-更多样例，可参考：
+更多示例，可参考：
 
 > - [Base-Assessor](https://msrasrg.visualstudio.com/NeuralNetworkIntelligenceOpenSource/_git/Default?_a=contents&path=%2Fsrc%2Fsdk%2Fpynni%2Fnni%2Fassessor.py&version=GBadd_readme)
\ No newline at end of file
diff --git a/examples/nas/darts/README_zh_CN.md b/examples/nas/darts/README_zh_CN.md
new file mode 100644
index 0000000000..3c50963083
--- /dev/null
+++ b/examples/nas/darts/README_zh_CN.md
@@ -0,0 +1 @@
+[文档](https://nni.readthedocs.io/zh/latest/NAS/DARTS.html)
diff --git a/examples/nas/enas/README_zh_CN.md b/examples/nas/enas/README_zh_CN.md
new file mode 100644
index 0000000000..03919b0f68
--- /dev/null
+++ b/examples/nas/enas/README_zh_CN.md
@@ -0,0 +1 @@
+[文档](https://nni.readthedocs.io/zh/latest/NAS/ENAS.html)
diff --git a/examples/nas/naive/README_zh_CN.md b/examples/nas/naive/README_zh_CN.md
new file mode 100644
index 0000000000..86c93d519d
--- /dev/null
+++ b/examples/nas/naive/README_zh_CN.md
@@ -0,0 +1 @@
+这是一个简单示例，演示如何使用 NNI 接口实现 NAS 搜索空间。
\ No newline at end of file
diff --git a/examples/nas/pdarts/README_zh_CN.md b/examples/nas/pdarts/README_zh_CN.md
new file mode 100644
index 0000000000..e43f5c20eb
--- /dev/null
+++ b/examples/nas/pdarts/README_zh_CN.md
@@ -0,0 +1 @@
+[文档](https://nni.readthedocs.io/zh/latest/NAS/PDARTS.html)
diff --git a/examples/nas/spos/README_zh_CN.md b/examples/nas/spos/README_zh_CN.md
new file mode 100644
index 0000000000..ddcfa20970
--- /dev/null
+++ b/examples/nas/spos/README_zh_CN.md
@@ -0,0 +1 @@
+[文档](https://nni.readthedocs.io/zh/latest/NAS/SPOS.html)
diff --git a/examples/nas/textnas/README_zh_CN.md b/examples/nas/textnas/README_zh_CN.md
new file mode 100644
index 0000000000..22bbbb4c9b
--- /dev/null
+++ b/examples/nas/textnas/README_zh_CN.md
@@ -0,0 +1,45 @@
+# TextNAS: A Neural Architecture Search Space tailored for Text Representation
+
+TextNAS 由 MSRA 提出 正式版本。
+
+[论文链接](https://arxiv.org/abs/1912.10729)
+
+## 准备
+
+准备词向量和 SST 数据集，并按如下结构放到 data 目录中：
+
+```
+textnas
+├── data
+│   ├── sst
+│   │   └── trees
+│   │       ├── dev.txt
+│   │       ├── test.txt
+│   │       └── train.txt
+│   └── glove.840B.300d.txt
+├── dataloader.py
+├── model.py
+├── ops.py
+├── README.md
+├── search.py
+└── utils.py
+```
+
+以下链接有助于查找和下载相应的数据集：
+
+* [GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/)
+* [Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank](https://nlp.stanford.edu/sentiment/)
+
+## 搜索
+
+```
+python search.py
+```
+
+在每个搜索 Epoch 后，会直接测试 10 个采样的结构。 10 个 Epoch 后的性能预计为 40% - 42%。
+
+默认情况下，20 个采样结构会被导出到 `checkpoints` 目录中，以便进行下一步处理。
+
+## 重新训练
+
+待完成
diff --git a/examples/trials/README_zh_CN.md b/examples/trials/README_zh_CN.md
index f854066fa1..f28a3d2c4a 100644
--- a/examples/trials/README_zh_CN.md
+++ b/examples/trials/README_zh_CN.md
@@ -6,7 +6,7 @@
 
 **1) 写好原始的训练代码**。
 
-Trial 的代码可以是任何能在本机运行的机器学习代码。 这里使用 `mnist-keras. py` 作为样例：
+Trial 的代码可以是任何能在本机运行的机器学习代码。 这里使用 `mnist-keras. py` 作为示例：
 
 ```python
 import argparse
@@ -163,7 +163,7 @@ def train(args, params):
 ...
 ```
 
-这是完整的样例：
+这是完整示例：
 
 ```python
 import argparse
diff --git a/examples/trials/cifar10_pytorch/README_zh_CN.md b/examples/trials/cifar10_pytorch/README_zh_CN.md
index 00c0d4bae4..821f0dd51e 100644
--- a/examples/trials/cifar10_pytorch/README_zh_CN.md
+++ b/examples/trials/cifar10_pytorch/README_zh_CN.md
@@ -1,3 +1,3 @@
-此样例需要安装 Pytorch。 Pytorch 安装包需要选择所基于的 Python 和 CUDA 版本。
+此示例需要安装 Pytorch。 Pytorch 安装包需要选择所基于的 Python 和 CUDA 版本。
 
-以下是 python==3.5 和 cuda == 8.0 下的环境样例，使用下列命令来安装 Pytorch： python3 -m pip install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl python3 -m pip install torchvision
\ No newline at end of file
+以下是 python==3.5 和 cuda == 8.0 下的环境示例，使用下列命令来安装 Pytorch： python3 -m pip install http://download.pytorch.org/whl/cu80/torch-0.4.1-cp35-cp35m-linux_x86_64.whl python3 -m pip install torchvision
\ No newline at end of file
diff --git a/examples/trials/ga_squad/README_zh_CN.md b/examples/trials/ga_squad/README_zh_CN.md
index 7a313c931c..325ed83e35 100644
--- a/examples/trials/ga_squad/README_zh_CN.md
+++ b/examples/trials/ga_squad/README_zh_CN.md
@@ -1,6 +1,6 @@
 # 在阅读理解上使用自动模型架构搜索
 
-该样例展示了如何使用遗传算法为阅读理解任务找到好的模型架构。
+该示例展示了如何使用遗传算法为阅读理解任务找到好的模型架构。
 
 ## 搜索空间
 
@@ -20,9 +20,9 @@
 
 另一个时间更快，性能更好的版本正在开发中。 很快将发布。
 
-# 如何运行此样例？
+# 如何运行此示例？
 
-## 在本机或远程上运行此样例
+## 在本机或远程上运行此示例
 
 ### 使用下载脚本来下载数据
 
@@ -82,7 +82,7 @@
     nnictl create --config ~/nni/examples/trials/ga_squad/config.yml
     
 
-## 在 OpenPAI 上运行此样例
+## 在 OpenPAI 上运行此示例
 
 根据上传大小的限制，仅上传源代码，并在训练过程中下载数据。 本 Experiment 需要的内存 `memoryMB >= 32G`，训练过程可能需要数小时。
 
@@ -139,7 +139,7 @@
 
 ## 实现方法
 
-基于进化算法架构的问答和其它样例一样，有两个部分：Trial 和 Tuner。
+基于进化算法架构的问答和其它示例一样，有两个部分：Trial 和 Tuner。
 
 ### Trial
 
@@ -245,7 +245,7 @@ Tuner 比 Trial 代码简单很多。 它们共用了同样的 `graph.py`。 此
 
 ## 模型配置格式
 
-这是模型配置的样例，在架构搜索过程中，从 Tuner 传入 Trial 的代码。
+这是模型配置的示例，在架构搜索过程中，从 Tuner 传入 Trial 的代码。
 
     {
         "max_layer_num": 50,
diff --git a/examples/trials/kaggle-tgs-salt/README_zh_CN.md b/examples/trials/kaggle-tgs-salt/README_zh_CN.md
index 91e8f56b84..fd68bbaa81 100644
--- a/examples/trials/kaggle-tgs-salt/README_zh_CN.md
+++ b/examples/trials/kaggle-tgs-salt/README_zh_CN.md
@@ -1,6 +1,6 @@
 ## Kaggle 比赛 [TGS Salt Identification Chanllenge](https://www.kaggle.com/c/tgs-salt-identification-challenge) 第 33 名的解决方案
 
-此样例展示了如何在没有任何代码改动的情况下通过 NNI 来为竞赛代码使用自动机器学习。 要在 NNI 上运行此代码，首先需要单独运行它，然后配置 config.yml：
+本示例展示了如何不改动代码的情况下通过 NNI 来为竞赛代码使用自动机器学习。 要在 NNI 上运行此代码，首先需要单独运行它，然后配置 config.yml：
 
     nnictl create --config config.yml
     
diff --git a/examples/trials/network_morphism/README_zh_CN.md b/examples/trials/network_morphism/README_zh_CN.md
index 6803fd41b9..5ad0db080a 100644
--- a/examples/trials/network_morphism/README_zh_CN.md
+++ b/examples/trials/network_morphism/README_zh_CN.md
@@ -1,8 +1,8 @@
 # 在 NNI 中用网络形态算法来进行自动模型结构搜索
 
-Network Morphism （网络形态）是内置的 Tuner，它使用了网络形态技术来搜索和评估新的网络结构。 该样例展示了如何使用它来为深度学习找到好的模型架构。
+Network Morphism （网络形态）是内置的 Tuner，它使用了网络形态技术来搜索和评估新的网络结构。 该示例展示了如何使用它来为深度学习找到好的模型架构。
 
-## 如何运行此样例？
+## 如何运行此示例？
 
 ### 1. 训练框架支持
 
@@ -58,7 +58,7 @@ trial:
 
 ### 4. 在代码中调用 "json\_to\_graph()" 函数
 
-修改代码来调用 "json\_to\_graph()" 函数来从收到的 JSON 字符串生成一个 Pytorch 或 Keras 模型。 简单样例：
+修改代码来调用 "json\_to\_graph()" 函数来从收到的 JSON 字符串生成一个 Pytorch 或 Keras 模型。 简单示例：
 
 ```python
 import nni
@@ -91,18 +91,18 @@ nni.report_final_result(best_acc)
 nnictl create --config config.yml
 ```
 
-## Trial 样例
+## Trial 示例
 
 下面的代码可在 `examples/trials/network_morphism/` 中找到。 可参考此代码来更新自己的任务。 希望它对你有用。
 
 ### FashionMNIST
 
-`Fashion-MNIST` 是来自 [Zalando](https://jobs.zalando.com/tech/) 文章的图片 — 有 60,000 个样例的训练集和 10,000 个样例的测试集。 每个样例是 28x28 的灰度图，分为 10 个类别。 由于 MNIST 数据集过于简单，该数据集现在开始被广泛使用，用来替换 MNIST 作为基准数据集。
+`Fashion-MNIST` 是来自 [Zalando](https://jobs.zalando.com/tech/) 文章的图片 — 有 60,000 个示例的训练集和 10,000 个示例的测试集。 每个示例是 28x28 的灰度图，分为 10 个类别。 由于 MNIST 数据集过于简单，该数据集现在开始被广泛使用，用来替换 MNIST 作为基准数据集。
 
-这里有两个样例，[FashionMNIST-keras.py](./FashionMNIST/FashionMNIST_keras.py) 和 [FashionMNIST-pytorch.py](./FashionMNIST/FashionMNIST_pytorch.py)。 注意，在 `config.yml` 中，需要为此数据集修改 `input_width` 为 28，以及 `input_channel` 为 1。
+这里有两个示例，[FashionMNIST-keras.py](./FashionMNIST/FashionMNIST_keras.py) 和 [FashionMNIST-pytorch.py](./FashionMNIST/FashionMNIST_pytorch.py)。 注意，在 `config.yml` 中，需要为此数据集修改 `input_width` 为 28，以及 `input_channel` 为 1。
 
 ### Cifar10
 
 `CIFAR-10` 数据集 [Canadian Institute For Advanced Research](https://www.cifar.ca/) 是广泛用于机器学习和视觉算法训练的数据集。 它是机器学习领域最广泛使用的数据集之一。 CIFAR-10 数据集包含了 60,000 张 32x32 的彩色图片，分为 10 类。
 
-这里有两个样例，[cifar10-keras.py](./cifar10/cifar10_keras.py) 和 [cifar10-pytorch.py](./cifar10/cifar10_pytorch.py)。 在 `config.yml` 中，该数据集 `input_width` 的值是 32，并且 `input_channel` 是 3。
\ No newline at end of file
+这里有两个示例，[cifar10-keras.py](./cifar10/cifar10_keras.py) 和 [cifar10-pytorch.py](./cifar10/cifar10_pytorch.py)。 在 `config.yml` 中，该数据集 `input_width` 的值是 32，并且 `input_channel` 是 3。
\ No newline at end of file
diff --git a/tools/nni_annotation/README_zh_CN.md b/tools/nni_annotation/README_zh_CN.md
index 17076724c0..f8a4238709 100644
--- a/tools/nni_annotation/README_zh_CN.md
+++ b/tools/nni_annotation/README_zh_CN.md
@@ -4,14 +4,14 @@
 
 为了获得良好的用户体验并减少对以后代码的影响，NNI 设计了通过 Annotation（标记）来使用的语法。 通过 Annotation，只需要在代码中加入一些注释字符串，就能启用 NNI，完全不影响代码原先的执行逻辑。
 
-样例如下：
+示例如下：
 
 ```python
 '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''
 learning_rate = 0.1
 ```
 
-此样例中，NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation，是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息，来给这一行的变量赋上相应的值。
+此示例中，NNI 会从 (0.1, 0.01, 0.001) 中选择一个值赋给 learning_rate 变量。 第一行就是 NNI 的 Annotation，是 Python 中的一个字符串。 接下来的一行需要是赋值语句。 NNI 会根据 Annotation 行的信息，来给这一行的变量赋上相应的值。
 
 通过这种方式，不需要修改任何代码，代码既可以直接运行，又可以使用 NNI 来调参。
 
@@ -43,7 +43,7 @@ NNI 支持如下 10 种类型来表示搜索空间：
 - `@nni.variable(nni.lognormal(mu, sigma),name=variable)` 变量值分布的公式为： exp(normal(mu, sigma))
 - `@nni.variable(nni.qlognormal(mu, sigma, q),name=variable)` 变量值分布的公式为： round(exp(normal(mu, sigma)) / q) * q
 
-样例如下：
+示例如下：
 
 ```python
 '''@nni.variable(nni.choice(0.1, 0.01, 0.001), name=learning_rate)'''

From 4ed78edd55c0a0a59a636b3d531cac0e0508b828 Mon Sep 17 00:00:00 2001
From: anttisaukko <antti.saukko@gmail.com>
Date: Mon, 6 Jan 2020 23:13:03 -0800
Subject: [PATCH 21/23] Fix kernel typos (#1921)

---
 docs/en_US/TrialExample/SklearnExamples.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en_US/TrialExample/SklearnExamples.md b/docs/en_US/TrialExample/SklearnExamples.md
index 4790123a15..0c481ee2ac 100644
--- a/docs/en_US/TrialExample/SklearnExamples.md
+++ b/docs/en_US/TrialExample/SklearnExamples.md
@@ -20,7 +20,7 @@ nnictl create --config ./config.yml
 
 This example uses the dataset of digits, which is made up of 1797 8x8 images, and each image is a hand-written digit, the goal is to classify these images into 10 classes.
 
-In this example, we use SVC as the model, and choose some parameters of this model, including `"C", "keral", "degree", "gamma" and "coef0"`. For more information of these parameters, please [refer](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html).
+In this example, we use SVC as the model, and choose some parameters of this model, including `"C", "kernel", "degree", "gamma" and "coef0"`. For more information of these parameters, please [refer](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html).
 
 ### 2.2 regression
 
@@ -64,7 +64,7 @@ It is easy to use NNI in your scikit-learn code, there are only a few steps.
   ```json
   {
     "C": {"_type":"uniform","_value":[0.1, 1]},
-    "keral": {"_type":"choice","_value":["linear", "rbf", "poly", "sigmoid"]},
+    "kernel": {"_type":"choice","_value":["linear", "rbf", "poly", "sigmoid"]},
     "degree": {"_type":"choice","_value":[1, 2, 3, 4]},
     "gamma": {"_type":"uniform","_value":[0.01, 0.1]},
     "coef0 ": {"_type":"uniform","_value":[0.01, 0.1]}
@@ -76,7 +76,7 @@ It is easy to use NNI in your scikit-learn code, there are only a few steps.
   ```python
   params = {
         'C': 1.0,
-        'keral': 'linear',
+        'kernel': 'linear',
         'degree': 3,
         'gamma': 0.01,
         'coef0': 0.01

From bf2b92909c70c364369bbcf1a77db442f571d34e Mon Sep 17 00:00:00 2001
From: SparkSnail <shinyang@microsoft.com>
Date: Wed, 8 Jan 2020 14:58:52 +0800
Subject: [PATCH 22/23] Support monitor mode when creating or resuming a new
 experiment (#1933)

---
 docs/en_US/Tutorial/Nnictl.md |  2 ++
 tools/bash-completion         |  4 ++--
 tools/nni_cmd/launcher.py     |  6 ++++--
 tools/nni_cmd/nnictl.py       |  2 ++
 tools/nni_cmd/nnictl_utils.py | 36 ++++++++++++++++++++++++++++-------
 5 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/docs/en_US/Tutorial/Nnictl.md b/docs/en_US/Tutorial/Nnictl.md
index 83c81f7145..b58d4c4a37 100644
--- a/docs/en_US/Tutorial/Nnictl.md
+++ b/docs/en_US/Tutorial/Nnictl.md
@@ -49,6 +49,7 @@ nnictl support commands:
   |--config, -c|  True| |YAML configure file of the experiment|
   |--port, -p|False| |the port of restful server|
   |--debug, -d|False||set debug mode|
+  |--watch, -w|False||set watch mode|
 
 * Examples
 
@@ -97,6 +98,7 @@ Debug mode will disable version check function in Trialkeeper.
   |id|  True| |The id of the experiment you want to resume|
   |--port, -p|  False| |Rest port of the experiment you want to resume|
   |--debug, -d|False||set debug mode|
+  |--watch, -w|False||set watch mode|
 
 * Example
 
diff --git a/tools/bash-completion b/tools/bash-completion
index 86283d6ec7..031f9b3476 100644
--- a/tools/bash-completion
+++ b/tools/bash-completion
@@ -1,7 +1,7 @@
 # list of commands/arguments
 __nnictl_cmds="create resume view update stop trial experiment platform import export webui config log package tensorboard top"
-__nnictl_create_cmds="--config --port --debug"
-__nnictl_resume_cmds="--port --debug"
+__nnictl_create_cmds="--config --port --debug --watch"
+__nnictl_resume_cmds="--port --debug --watch"
 __nnictl_view_cmds="--port"
 __nnictl_update_cmds="searchspace concurrency duration trialnum"
 __nnictl_update_searchspace_cmds="--filename"
diff --git a/tools/nni_cmd/launcher.py b/tools/nni_cmd/launcher.py
index 54ce4aec77..5d406a0ae3 100644
--- a/tools/nni_cmd/launcher.py
+++ b/tools/nni_cmd/launcher.py
@@ -20,7 +20,7 @@
                           detect_port, get_user, get_python_dir
 from .constants import NNICTL_HOME_DIR, ERROR_INFO, REST_TIME_OUT, EXPERIMENT_SUCCESS_INFO, LOG_HEADER, PACKAGE_REQUIREMENTS
 from .command_utils import check_output_command, kill_command
-from .nnictl_utils import update_experiment
+from .nnictl_utils import update_experiment, set_monitor
 
 def get_log_path(config_file_name):
     '''generate stdout and stderr log path'''
@@ -493,6 +493,8 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
                                             experiment_config['experimentName'])
 
     print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, '   '.join(web_ui_url_list)))
+    if args.watch:
+        set_monitor(True, 3, args.port, rest_process.pid)
 
 def create_experiment(args):
     '''start a new experiment'''
@@ -506,8 +508,8 @@ def create_experiment(args):
     validate_all_content(experiment_config, config_path)
 
     nni_config.set_config('experimentConfig', experiment_config)
-    launch_experiment(args, experiment_config, 'new', config_file_name)
     nni_config.set_config('restServerPort', args.port)
+    launch_experiment(args, experiment_config, 'new', config_file_name)
 
 def manage_stopped_experiment(args, mode):
     '''view a stopped experiment'''
diff --git a/tools/nni_cmd/nnictl.py b/tools/nni_cmd/nnictl.py
index ab8e4153ca..856bd2adc8 100644
--- a/tools/nni_cmd/nnictl.py
+++ b/tools/nni_cmd/nnictl.py
@@ -51,6 +51,7 @@ def parse_args():
     parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
     parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
     parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
+    parser_start.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
     parser_start.set_defaults(func=create_experiment)
 
     # parse resume command
@@ -58,6 +59,7 @@ def parse_args():
     parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
     parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
     parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
+    parser_resume.add_argument('--watch', '-w', action='store_true', help=' set watch mode')
     parser_resume.set_defaults(func=resume_experiment)
 
     # parse view command
diff --git a/tools/nni_cmd/nnictl_utils.py b/tools/nni_cmd/nnictl_utils.py
index 3fad9c2599..a66197fac9 100644
--- a/tools/nni_cmd/nnictl_utils.py
+++ b/tools/nni_cmd/nnictl_utils.py
@@ -3,6 +3,7 @@
 
 import csv
 import os
+import sys
 import json
 import time
 import re
@@ -623,23 +624,44 @@ def show_experiment_info():
                           content[index].get('endTime'), content[index].get('status')))
         print(TRIAL_MONITOR_TAIL)
 
-def monitor_experiment(args):
-    '''monitor the experiment'''
-    if args.time <= 0:
-        print_error('please input a positive integer as time interval, the unit is second.')
-        exit(1)
+def set_monitor(auto_exit, time_interval, port=None, pid=None):
+    '''set the experiment monitor engine'''
     while True:
         try:
-            os.system('clear')
+            if sys.platform == 'win32':
+                os.system('cls')
+            else:
+                os.system('clear')
             update_experiment()
             show_experiment_info()
-            time.sleep(args.time)
+            if auto_exit:
+                status = get_experiment_status(port)
+                if status in ['DONE', 'ERROR', 'STOPPED']:
+                    print_normal('Experiment status is {0}.'.format(status))
+                    print_normal('Stopping experiment...')
+                    kill_command(pid)
+                    print_normal('Stop experiment success.')
+                    exit(0)
+            time.sleep(time_interval)
         except KeyboardInterrupt:
+            if auto_exit:
+                print_normal('Stopping experiment...')
+                kill_command(pid)
+                print_normal('Stop experiment success.')
+            else:
+                print_normal('Exiting...')
             exit(0)
         except Exception as exception:
             print_error(exception)
             exit(1)
 
+def monitor_experiment(args):
+    '''monitor the experiment'''
+    if args.time <= 0:
+        print_error('please input a positive integer as time interval, the unit is second.')
+        exit(1)
+    set_monitor(False, args.time)
+
 def export_trials_data(args):
     '''export experiment metadata to csv
     '''

From 3e39c96a46ffa4a1c867e37b92de55c4f5fb8113 Mon Sep 17 00:00:00 2001
From: Yan Ni <yann@microsoft.com>
Date: Thu, 9 Jan 2020 13:24:01 +0800
Subject: [PATCH 23/23] Add test for documentation build (#1924)

---
 azure-pipelines.yml                       | 5 +++++
 docs/en_US/Compressor/Pruner.md           | 2 --
 docs/en_US/Compressor/Quantizer.md        | 6 ++----
 docs/en_US/Release.md                     | 4 ++--
 docs/en_US/TrainingService/PaiYarnMode.md | 2 +-
 docs/en_US/conf.py                        | 2 +-
 docs/en_US/examples.rst                   | 1 +
 docs/en_US/model_compression.rst          | 2 +-
 docs/en_US/training_services.rst          | 1 +
 src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py  | 6 +++++-
 10 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index a0939c8bce..3f4238e413 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -31,6 +31,7 @@ jobs:
       python3 -m pip install tensorflow==1.13.1 --user
       python3 -m pip install keras==2.1.6 --user
       python3 -m pip install gym onnx --user
+      python3 -m pip install sphinx==1.8.3 sphinx-argparse==0.2.5 sphinx-markdown-tables==0.0.9 sphinx-rtd-theme==0.4.2 sphinxcontrib-websupport==1.1.0 recommonmark==0.5.0 --user
       sudo apt-get install swig -y
       nnictl package install --name=SMAC
       nnictl package install --name=BOHB
@@ -69,6 +70,10 @@ jobs:
       cd test
       python3 cli_test.py
     displayName: 'nnicli test'
+  - script: |
+      cd docs/en_US/
+      sphinx-build -M html . _build -W
+    displayName: 'Sphinx Documentation Build check'
 
 - job: 'basic_test_pr_macOS'
   pool:
diff --git a/docs/en_US/Compressor/Pruner.md b/docs/en_US/Compressor/Pruner.md
index 4b5fdf16af..cb31b99b10 100644
--- a/docs/en_US/Compressor/Pruner.md
+++ b/docs/en_US/Compressor/Pruner.md
@@ -342,5 +342,3 @@ You can view example for more information
 
 - **sparsity:** How much percentage of convolutional filters are to be pruned.
 - **op_types:** Only Conv2d is supported in ActivationMeanRankFilterPruner
-
-***
\ No newline at end of file
diff --git a/docs/en_US/Compressor/Quantizer.md b/docs/en_US/Compressor/Quantizer.md
index 3308f25c1b..574926c7ad 100644
--- a/docs/en_US/Compressor/Quantizer.md
+++ b/docs/en_US/Compressor/Quantizer.md
@@ -5,11 +5,9 @@ Quantizer on NNI Compressor
 We provide Naive Quantizer to quantizer weight to default 8 bits, you can use it to test quantize algorithm without any configure.
 
 ### Usage
-tensorflow
-```python nni.compression.tensorflow.NaiveQuantizer(model_graph).compress()
-```
 pytorch
-```python nni.compression.torch.NaiveQuantizer(model).compress()
+```python 
+model = nni.compression.torch.NaiveQuantizer(model).compress()
 ```
 
 ***
diff --git a/docs/en_US/Release.md b/docs/en_US/Release.md
index 02e6604767..b2d57e8b00 100644
--- a/docs/en_US/Release.md
+++ b/docs/en_US/Release.md
@@ -186,7 +186,7 @@
   * Run trial jobs on the GPU running non-NNI jobs
 * Kubeflow v1beta2 operator
   * Support Kubeflow TFJob/PyTorchJob v1beta2
-* [General NAS programming interface](AdvancedFeature/GeneralNasInterfaces.md)
+* [General NAS programming interface](https://github.com/microsoft/nni/blob/v0.8/docs/en_US/GeneralNasInterfaces.md)
   * Provide NAS programming interface for users to easily express their neural architecture search space through NNI annotation
   * Provide a new command `nnictl trial codegen` for debugging the NAS code
   * Tutorial of NAS programming interface, example of NAS on MNIST, customized random tuner for NAS
@@ -299,7 +299,7 @@
 * Support [Metis tuner](Tuner/MetisTuner.md) as a new NNI tuner. Metis algorithm has been proofed to be well performed for **online** hyper-parameter tuning.
 * Support [ENAS customized tuner](https://github.com/countif/enas_nni), a tuner contributed by github community user, is an algorithm for neural network search, it could learn neural network architecture via reinforcement learning and serve a better performance than NAS.
 * Support [Curve fitting assessor](Assessor/CurvefittingAssessor.md) for early stop policy using learning curve extrapolation.
-* Advanced Support of [Weight Sharing](AdvancedFeature/AdvancedNas.md): Enable weight sharing for NAS tuners, currently through NFS.
+* Advanced Support of [Weight Sharing](https://github.com/microsoft/nni/blob/v0.5/docs/AdvancedNAS.md): Enable weight sharing for NAS tuners, currently through NFS.
 
 #### Training Service Enhancement
 
diff --git a/docs/en_US/TrainingService/PaiYarnMode.md b/docs/en_US/TrainingService/PaiYarnMode.md
index 078ac1dc02..eb2864f94c 100644
--- a/docs/en_US/TrainingService/PaiYarnMode.md
+++ b/docs/en_US/TrainingService/PaiYarnMode.md
@@ -106,7 +106,7 @@ nnictl create --config exp_paiYarn.yml
 ```
 to start the experiment in paiYarn mode. NNI will create OpenpaiYarn job for each trial, and the job name format is something like `nni_exp_{experiment_id}_trial_{trial_id}`.
 You can see jobs created by NNI in the OpenpaiYarn cluster's web portal, like:
-![](../../img/nni_paiYarn_joblist.jpg)
+![](../../img/nni_pai_joblist.jpg)
 
 Notice: In paiYarn mode, NNIManager will start a rest server and listen on a port which is your NNI WebUI's port plus 1. For example, if your WebUI port is `8080`, the rest server will listen on `8081`, to receive metrics from trial job running in Kubernetes. So you should `enable 8081` TCP port in your firewall rule to allow incoming traffic.
 
diff --git a/docs/en_US/conf.py b/docs/en_US/conf.py
index 73286ad0a1..60b2afe782 100644
--- a/docs/en_US/conf.py
+++ b/docs/en_US/conf.py
@@ -72,7 +72,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'Release_v1.0.md']
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
diff --git a/docs/en_US/examples.rst b/docs/en_US/examples.rst
index 57a1547610..d260e648ce 100644
--- a/docs/en_US/examples.rst
+++ b/docs/en_US/examples.rst
@@ -11,3 +11,4 @@ Examples
     EvolutionSQuAD<./TrialExample/SquadEvolutionExamples>
     GBDT<./TrialExample/GbdtExample>
     RocksDB <./TrialExample/RocksdbExamples>
+    KDExample <./TrialExample/KDExample>
diff --git a/docs/en_US/model_compression.rst b/docs/en_US/model_compression.rst
index 9ead0837a6..36cff91193 100644
--- a/docs/en_US/model_compression.rst
+++ b/docs/en_US/model_compression.rst
@@ -18,7 +18,7 @@ For details, please refer to the following tutorials:
     Overview <Compressor/Overview>
     Level Pruner <Compressor/Pruner>
     AGP Pruner <Compressor/Pruner>
-    L1Filter Pruner <Compressor/L1FilterPruner>
+    L1Filter Pruner <Compressor/l1filterpruner>
     Slim Pruner <Compressor/SlimPruner>
     Lottery Ticket Pruner <Compressor/LotteryTicketHypothesis>
     FPGM Pruner <Compressor/Pruner>
diff --git a/docs/en_US/training_services.rst b/docs/en_US/training_services.rst
index bfc8d9a746..b5b520f4ed 100644
--- a/docs/en_US/training_services.rst
+++ b/docs/en_US/training_services.rst
@@ -6,5 +6,6 @@ Introduction to NNI Training Services
     Local<./TrainingService/LocalMode>
     Remote<./TrainingService/RemoteMachineMode>
     OpenPAI<./TrainingService/PaiMode>
+    OpenPAI Yarn Mode<./TrainingService/PaiYarnMode>
     Kubeflow<./TrainingService/KubeflowMode>
     FrameworkController<./TrainingService/FrameworkControllerMode>
diff --git a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
index 452a17a41e..80166370f7 100644
--- a/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
+++ b/src/sdk/pynni/nni/ppo_tuner/ppo_tuner.py
@@ -503,8 +503,11 @@ def generate_parameters(self, parameter_id, **kwargs):
         """
         Generate parameters, if no trial configration for now, self.credit plus 1 to send the config later
 
+        Parameters
+        ----------
         parameter_id : int
-            Unique identifier for requested hyper-parameters. This will later be used in :meth:`receive_trial_result`.
+            Unique identifier for requested hyper-parameters.
+            This will later be used in :meth:`receive_trial_result`.
         **kwargs
             Not used
 
@@ -512,6 +515,7 @@ def generate_parameters(self, parameter_id, **kwargs):
         -------
         dict
             One newly generated configuration
+
         """
         if self.first_inf:
             self.trials_result = [None for _ in range(self.inf_batch_size)]