diff --git a/python/dgl/__init__.py b/python/dgl/__init__.py
index 78d2d9c6de77..1dec84b54da1 100644
--- a/python/dgl/__init__.py
+++ b/python/dgl/__init__.py
@@ -1,5 +1,4 @@
-# One has to manually import dgl.data; fixes #125
-#from . import data
+"""DGL root package."""
 from . import function
 from . import nn
 from . import contrib
@@ -12,7 +11,6 @@
 from .backend import load_backend
 from .batched_graph import *
 from .graph import DGLGraph
-from .subgraph import DGLSubGraph
 from .traversal import *
 from .propagate import *
 from .udf import NodeBatch, EdgeBatch
diff --git a/python/dgl/_api_internal.py b/python/dgl/_api_internal.py
index e69de29bb2d1..af08960ca6e8 100644
--- a/python/dgl/_api_internal.py
+++ b/python/dgl/_api_internal.py
@@ -0,0 +1 @@
+"""Namespace for internal apis."""
diff --git a/python/dgl/_ffi/base.py b/python/dgl/_ffi/base.py
index 0c2f868ca9c9..623bd554036d 100644
--- a/python/dgl/_ffi/base.py
+++ b/python/dgl/_ffi/base.py
@@ -26,8 +26,7 @@
 
 class DGLError(Exception):
     """Error thrown by DGL function"""
-    pass
-
+    pass  # pylint: disable=unnecessary-pass
 
 def _load_lib():
     """Load libary by searching possible path."""
diff --git a/python/dgl/_ffi/function.py b/python/dgl/_ffi/function.py
index 6b70bc799d1a..af89a2bf9357 100644
--- a/python/dgl/_ffi/function.py
+++ b/python/dgl/_ffi/function.py
@@ -51,7 +51,7 @@ class Function(_FunctionBase):
     dgl.register_func: How to register global function.
     dgl.get_global_func: How to get global function.
     """
-    pass
+    pass  # pylint: disable=unnecessary-pass
 
 
 class ModuleBase(object):
diff --git a/python/dgl/_ffi/runtime_ctypes.py b/python/dgl/_ffi/runtime_ctypes.py
index c2543cd33706..7ed7cd0bb5bd 100644
--- a/python/dgl/_ffi/runtime_ctypes.py
+++ b/python/dgl/_ffi/runtime_ctypes.py
@@ -1,5 +1,5 @@
 """Common runtime ctypes."""
-# pylint: disable=invalid-name
+# pylint: disable=invalid-name, super-init-not-called
 from __future__ import absolute_import
 
 import ctypes
diff --git a/python/dgl/backend/mxnet/immutable_graph_index.py b/python/dgl/backend/mxnet/immutable_graph_index.py
index 3ae84abfa9ba..46947715d97d 100644
--- a/python/dgl/backend/mxnet/immutable_graph_index.py
+++ b/python/dgl/backend/mxnet/immutable_graph_index.py
@@ -349,6 +349,31 @@ def from_coo_matrix(self, out_coo):
         self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=(size, size)).astype(np.int64),
                 mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=(size, size)).astype(np.int64))
 
+    def from_edge_list(self, elist):
+        """Convert from an edge list.
+
+        Paramters
+        ---------
+        elist : list
+            List of (u, v) edge tuple.
+        """
+        src, dst = zip(*elist)
+        src = np.array(src)
+        dst = np.array(dst)
+        num_nodes = max(src.max(), dst.max()) + 1
+        min_nodes = min(src.min(), dst.min())
+        if min_nodes != 0:
+            raise DGLError('Invalid edge list. Nodes must start from 0.')
+        edge_ids = mx.nd.arange(0, len(src), step=1, repeat=1, dtype=np.int32)
+        src = mx.nd.array(src, dtype=np.int64)
+        dst = mx.nd.array(dst, dtype=np.int64)
+        # TODO we can't generate a csr_matrix with np.int64 directly.
+        in_csr = mx.nd.sparse.csr_matrix((edge_ids, (dst, src)),
+                                         shape=(num_nodes, num_nodes)).astype(np.int64)
+        out_csr = mx.nd.sparse.csr_matrix((edge_ids, (src, dst)),
+                                          shape=(num_nodes, num_nodes)).astype(np.int64)
+        self.__init__(in_csr, out_csr)
+
 def create_immutable_graph_index(in_csr=None, out_csr=None):
     """ Create an empty backend-specific immutable graph index.
 
diff --git a/python/dgl/base.py b/python/dgl/base.py
index a16e5d698729..58c4336b3c51 100644
--- a/python/dgl/base.py
+++ b/python/dgl/base.py
@@ -3,12 +3,15 @@
 
 import warnings
 
-from ._ffi.base import DGLError
+from ._ffi.base import DGLError  # pylint: disable=unused-import
 
-# A special argument for selecting all nodes/edges.
+# A special symbol for selecting all nodes or edges.
 ALL = "__ALL__"
 
 def is_all(arg):
+    """Return true if the argument is a special symbol for all nodes or edges."""
     return isinstance(arg, str) and arg == ALL
 
-dgl_warning = warnings.warn
+def dgl_warning(msg):
+    """Print out warning messages."""
+    warnings.warn(msg)
diff --git a/python/dgl/batched_graph.py b/python/dgl/batched_graph.py
index a4af371683b0..a952d0e51b16 100644
--- a/python/dgl/batched_graph.py
+++ b/python/dgl/batched_graph.py
@@ -1,10 +1,10 @@
 """Classes and functions for batching multiple graphs together."""
 from __future__ import absolute_import
 
+from collections.abc import Iterable
 import numpy as np
-from collections import Iterable
 
-from .base import ALL, is_all
+from .base import ALL, is_all, DGLError
 from .frame import FrameRef, Frame
 from .graph import DGLGraph
 from . import graph_index as gi
@@ -152,8 +152,7 @@ def _init_attrs(attrs, mode):
             elif is_all(attrs):
                 attrs = set()
                 # Check if at least a graph has mode items and associated features.
-                for i in range(len(graph_list)):
-                    g = graph_list[i]
+                for i, g in enumerate(graph_list):
                     g_num_items, g_attrs = _get_num_item_and_attr_types(g, mode)
                     if g_num_items > 0 and len(g_attrs) > 0:
                         attrs = g_attrs
@@ -161,13 +160,13 @@ def _init_attrs(attrs, mode):
                         break
                 # Check if all the graphs with mode items have the same associated features.
                 if len(attrs) > 0:
-                    for i in range(len(graph_list)):
+                    for i, g in enumerate(graph_list):
                         g = graph_list[i]
                         g_num_items, g_attrs = _get_num_item_and_attr_types(g, mode)
                         if g_attrs != attrs and g_num_items > 0:
-                            raise ValueError('Expect graph {} and {} to have the same {} '
-                                             'attributes when {}_attrs=ALL, got {} and '
-                                             '{}'.format(ref_g_index, i, mode, mode, attrs, g_attrs))
+                            raise ValueError('Expect graph {0} and {1} to have the same {2} '
+                                             'attributes when {2}_attrs=ALL, got {3} and {4}.'
+                                             .format(ref_g_index, i, mode, attrs, g_attrs))
                 return attrs
             elif isinstance(attrs, str):
                 return [attrs]
@@ -200,25 +199,24 @@ def _init_attrs(attrs, mode):
                     for key in edge_attrs}
             batched_edge_frame = FrameRef(Frame(cols))
 
-        super(BatchedDGLGraph, self).__init__(
-                graph_data=batched_index,
-                node_frame=batched_node_frame,
-                edge_frame=batched_edge_frame)
+        super(BatchedDGLGraph, self).__init__(graph_data=batched_index,
+                                              node_frame=batched_node_frame,
+                                              edge_frame=batched_edge_frame)
 
         # extra members
         self._batch_size = 0
         self._batch_num_nodes = []
         self._batch_num_edges = []
-        for gr in graph_list:
-            if isinstance(gr, BatchedDGLGraph):
+        for grh in graph_list:
+            if isinstance(grh, BatchedDGLGraph):
                 # handle the input is again a batched graph.
-                self._batch_size += gr._batch_size
-                self._batch_num_nodes += gr._batch_num_nodes
-                self._batch_num_edges += gr._batch_num_edges
+                self._batch_size += grh._batch_size
+                self._batch_num_nodes += grh._batch_num_nodes
+                self._batch_num_edges += grh._batch_num_edges
             else:
                 self._batch_size += 1
-                self._batch_num_nodes.append(gr.number_of_nodes())
-                self._batch_num_edges.append(gr.number_of_edges())
+                self._batch_num_nodes.append(grh.number_of_nodes())
+                self._batch_num_edges.append(grh.number_of_edges())
 
     @property
     def batch_size(self):
@@ -251,33 +249,33 @@ def batch_num_edges(self):
         return self._batch_num_edges
 
     # override APIs
-    def add_nodes(self, num, reprs=None):
+    def add_nodes(self, num, data=None):
         """Add nodes. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
-    def add_edge(self, u, v, reprs=None):
+    def add_edge(self, u, v, data=None):
         """Add one edge. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
-    def add_edges(self, u, v, reprs=None):
+    def add_edges(self, u, v, data=None):
         """Add many edges. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
     # new APIs
     def __getitem__(self, idx):
         """Slice the batch and return the batch of graphs specified by the idx."""
         # TODO
-        pass
+        raise NotImplementedError
 
     def __setitem__(self, idx, val):
         """Set the value of the slice. The graph size cannot be changed."""
         # TODO
-        pass
+        raise NotImplementedError
 
-def split(graph_batch, num_or_size_splits):
+def split(graph_batch, num_or_size_splits):  # pylint: disable=unused-argument
     """Split the batch."""
     # TODO(minjie): could follow torch.split syntax
-    pass
+    raise NotImplementedError
 
 def unbatch(graph):
     """Return the list of graphs in this batch.
@@ -308,18 +306,18 @@ def unbatch(graph):
     """
     assert isinstance(graph, BatchedDGLGraph)
     bsize = graph.batch_size
-    bn = graph.batch_num_nodes
-    be = graph.batch_num_edges
-    pttns = gi.disjoint_partition(graph._graph, utils.toindex(bn))
+    bnn = graph.batch_num_nodes
+    bne = graph.batch_num_edges
+    pttns = gi.disjoint_partition(graph._graph, utils.toindex(bnn))
     # split the frames
-    node_frames = [FrameRef(Frame(num_rows=n)) for n in bn]
-    edge_frames = [FrameRef(Frame(num_rows=n)) for n in be]
+    node_frames = [FrameRef(Frame(num_rows=n)) for n in bnn]
+    edge_frames = [FrameRef(Frame(num_rows=n)) for n in bne]
     for attr, col in graph._node_frame.items():
-        col_splits = F.split(col, bn, dim=0)
+        col_splits = F.split(col, bnn, dim=0)
         for i in range(bsize):
             node_frames[i][attr] = col_splits[i]
     for attr, col in graph._edge_frame.items():
-        col_splits = F.split(col, be, dim=0)
+        col_splits = F.split(col, bne, dim=0)
         for i in range(bsize):
             edge_frames[i][attr] = col_splits[i]
     return [DGLGraph(graph_data=pttns[i],
@@ -355,47 +353,63 @@ def batch(graph_list, node_attrs=ALL, edge_attrs=ALL):
     return BatchedDGLGraph(graph_list, node_attrs, edge_attrs)
 
 
-_readout_on_attrs = {
-        'nodes': ('ndata', 'batch_num_nodes', 'number_of_nodes'),
-        'edges': ('edata', 'batch_num_edges', 'number_of_edges'),
-        }
+READOUT_ON_ATTRS = {
+    'nodes': ('ndata', 'batch_num_nodes', 'number_of_nodes'),
+    'edges': ('edata', 'batch_num_edges', 'number_of_edges'),
+}
 
-def _sum_on(graph, on, input, weight):
-    data_attr, batch_num_objs_attr, num_objs_attr = _readout_on_attrs[on]
+def _sum_on(graph, typestr, feat, weight):
+    """Internal function to sum node or edge features.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph.
+    typestr : str
+        'nodes' or 'edges'
+    feat : str
+        The feature field name.
+    weight : str
+        The weight field name.
+
+    Returns
+    -------
+    Tensor
+        The (weighted) summed node or edge features.
+    """
+    data_attr, batch_num_objs_attr, _ = READOUT_ON_ATTRS[typestr]
     data = getattr(graph, data_attr)
-    input = data[input]
+    feat = data[feat]
 
     if weight is not None:
         weight = data[weight]
-        weight = F.reshape(weight, (-1,) + (1,) * (F.ndim(input) - 1))
-        input = weight * input
+        weight = F.reshape(weight, (-1,) + (1,) * (F.ndim(feat) - 1))
+        feat = weight * feat
 
     if isinstance(graph, BatchedDGLGraph):
         n_graphs = graph.batch_size
         batch_num_objs = getattr(graph, batch_num_objs_attr)
-        n_objs = getattr(graph, num_objs_attr)()
 
-        seg_id = F.zerocopy_from_numpy(
-                np.arange(n_graphs, dtype='int64').repeat(batch_num_objs))
-        seg_id = F.copy_to(seg_id, F.context(input))
-        y = F.unsorted_1d_segment_sum(input, seg_id, n_graphs, 0)
+        seg_id = F.zerocopy_from_numpy(np.arange(n_graphs, dtype='int64').repeat(batch_num_objs))
+        seg_id = F.copy_to(seg_id, F.context(feat))
+        y = F.unsorted_1d_segment_sum(feat, seg_id, n_graphs, 0)
         return y
     else:
-        return F.sum(input, 0)
+        return F.sum(feat, 0)
 
-def sum_nodes(graph, input, weight=None):
-    """Sums all the values of node field :attr:`input` in :attr:`graph`, optionally
+def sum_nodes(graph, feat, weight=None):
+    """Sums all the values of node field :attr:`feat` in :attr:`graph`, optionally
     multiplies the field by a scalar node field :attr:`weight`.
 
     Parameters
     ----------
-    graph : DGLGraph or BatchedDGLGraph
-        The graph
-    input : str
-        The input field
+    graph : DGLGraph.
+        The graph.
+    feat : str
+        The feature field.
     weight : str, optional
         The weight field. If None, no weighting will be performed,
-        otherwise, weight each node feature with field :attr:`input`.
+        otherwise, weight each node feature with field :attr:`feat`.
         for summation. The weight feature associated in the :attr:`graph`
         should be a tensor of shape ``[graph.number_of_nodes(), 1]``.
 
@@ -450,21 +464,21 @@ def sum_nodes(graph, input, weight=None):
     sum_edges
     mean_edges
     """
-    return _sum_on(graph, 'nodes', input, weight)
+    return _sum_on(graph, 'nodes', feat, weight)
 
-def sum_edges(graph, input, weight=None):
-    """Sums all the values of edge field :attr:`input` in :attr:`graph`,
+def sum_edges(graph, feat, weight=None):
+    """Sums all the values of edge field :attr:`feat` in :attr:`graph`,
     optionally multiplies the field by a scalar edge field :attr:`weight`.
 
     Parameters
     ----------
-    graph : DGLGraph or BatchedDGLGraph
-        The graph
-    input : str
-        The input field
+    graph : DGLGraph
+        The graph.
+    feat : str
+        The feature field.
     weight : str, optional
         The weight field. If None, no weighting will be performed,
-        otherwise, weight each edge feature with field :attr:`input`.
+        otherwise, weight each edge feature with field :attr:`feat`.
         for summation. The weight feature associated in the :attr:`graph`
         should be a tensor of shape ``[graph.number_of_edges(), 1]``.
 
@@ -521,54 +535,70 @@ def sum_edges(graph, input, weight=None):
     mean_nodes
     mean_edges
     """
-    return _sum_on(graph, 'edges', input, weight)
+    return _sum_on(graph, 'edges', feat, weight)
 
 
-def _mean_on(graph, on, input, weight):
-    data_attr, batch_num_objs_attr, num_objs_attr = _readout_on_attrs[on]
+def _mean_on(graph, typestr, feat, weight):
+    """Internal function to sum node or edge features.
+
+    Parameters
+    ----------
+    graph : DGLGraph
+        The graph.
+    typestr : str
+        'nodes' or 'edges'
+    feat : str
+        The feature field name.
+    weight : str
+        The weight field name.
+
+    Returns
+    -------
+    Tensor
+        The (weighted) summed node or edge features.
+    """
+    data_attr, batch_num_objs_attr, _ = READOUT_ON_ATTRS[typestr]
     data = getattr(graph, data_attr)
-    input = data[input]
+    feat = data[feat]
 
     if weight is not None:
         weight = data[weight]
-        weight = F.reshape(weight, (-1,) + (1,) * (F.ndim(input) - 1))
-        input = weight * input
+        weight = F.reshape(weight, (-1,) + (1,) * (F.ndim(feat) - 1))
+        feat = weight * feat
 
     if isinstance(graph, BatchedDGLGraph):
         n_graphs = graph.batch_size
         batch_num_objs = getattr(graph, batch_num_objs_attr)
-        n_objs = getattr(graph, num_objs_attr)()
 
-        seg_id = F.zerocopy_from_numpy(
-                np.arange(n_graphs, dtype='int64').repeat(batch_num_objs))
-        seg_id = F.copy_to(seg_id, F.context(input))
+        seg_id = F.zerocopy_from_numpy(np.arange(n_graphs, dtype='int64').repeat(batch_num_objs))
+        seg_id = F.copy_to(seg_id, F.context(feat))
         if weight is not None:
             w = F.unsorted_1d_segment_sum(weight, seg_id, n_graphs, 0)
-            y = F.unsorted_1d_segment_sum(input, seg_id, n_graphs, 0)
+            y = F.unsorted_1d_segment_sum(feat, seg_id, n_graphs, 0)
             y = y / w
         else:
-            y = F.unsorted_1d_segment_mean(input, seg_id, n_graphs, 0)
+            y = F.unsorted_1d_segment_mean(feat, seg_id, n_graphs, 0)
         return y
     else:
         if weight is None:
-            return F.mean(input, 0)
+            return F.mean(feat, 0)
         else:
-            y = F.sum(input, 0) / F.sum(weight, 0)
+            y = F.sum(feat, 0) / F.sum(weight, 0)
             return y
 
-def mean_nodes(graph, input, weight=None):
-    """Averages all the values of node field :attr:`input` in :attr:`graph`,
+def mean_nodes(graph, feat, weight=None):
+    """Averages all the values of node field :attr:`feat` in :attr:`graph`,
     optionally multiplies the field by a scalar node field :attr:`weight`.
 
     Parameters
     ----------
     graph : DGLGraph or BatchedDGLGraph
-        The graph
-    input : str
-        The input field
+        The graph.
+    feat : str
+        The feature field.
     weight : str, optional
         The weight field. If None, no weighting will be performed,
-        otherwise, weight each node feature with field :attr:`input`.
+        otherwise, weight each node feature with field :attr:`feat`.
         for calculating mean. The weight feature associated in the :attr:`graph`
         should be a tensor of shape ``[graph.number_of_nodes(), 1]``.
 
@@ -623,21 +653,21 @@ def mean_nodes(graph, input, weight=None):
     sum_edges
     mean_edges
     """
-    return _mean_on(graph, 'nodes', input, weight)
+    return _mean_on(graph, 'nodes', feat, weight)
 
-def mean_edges(graph, input, weight=None):
-    """Averages all the values of edge field :attr:`input` in :attr:`graph`,
+def mean_edges(graph, feat, weight=None):
+    """Averages all the values of edge field :attr:`feat` in :attr:`graph`,
     optionally multiplies the field by a scalar edge field :attr:`weight`.
 
     Parameters
     ----------
-    graph : DGLGraph or BatchedDGLGraph
-        The graph
-    input : str
-        The input field
+    graph : DGLGraph
+        The graph.
+    feat : str
+        The feature field.
     weight : optional, str
         The weight field. If None, no weighting will be performed,
-        otherwise, weight each edge feature with field :attr:`input`.
+        otherwise, weight each edge feature with field :attr:`feat`.
         for calculating mean. The weight feature associated in the :attr:`graph`
         should be a tensor of shape ``[graph.number_of_edges(), 1]``.
 
@@ -694,4 +724,4 @@ def mean_edges(graph, input, weight=None):
     mean_nodes
     sum_edges
     """
-    return _mean_on(graph, 'edges', input, weight)
+    return _mean_on(graph, 'edges', feat, weight)
diff --git a/python/dgl/frame.py b/python/dgl/frame.py
index b4e6a60cc860..ed13c9a146ba 100644
--- a/python/dgl/frame.py
+++ b/python/dgl/frame.py
@@ -1,7 +1,8 @@
 """Columnar storage for DGLGraph."""
 from __future__ import absolute_import
 
-from collections import MutableMapping, namedtuple
+from collections import namedtuple
+from collections.abc import MutableMapping
 
 import sys
 import numpy as np
@@ -39,6 +40,18 @@ def _reconstruct_scheme(cls, shape, dtype_str):
             return cls(shape, dtype)
 
 def infer_scheme(tensor):
+    """Infer column scheme from the given tensor data.
+
+    Paramters
+    ---------
+    tensor : Tensor
+        The tensor data.
+
+    Returns
+    -------
+    Scheme
+        The column scheme.
+    """
     return Scheme(tuple(F.shape(tensor)[1:]), F.dtype(tensor))
 
 class Column(object):
@@ -64,6 +77,7 @@ def __len__(self):
 
     @property
     def shape(self):
+        """Return the scheme shape (feature shape) of this column."""
         return self.scheme.shape
 
     def __getitem__(self, idx):
@@ -71,7 +85,7 @@ def __getitem__(self, idx):
 
         Parameters
         ----------
-        idx : slice or utils.Index
+        idx : utils.Index
             The index.
 
         Returns
@@ -79,8 +93,9 @@ def __getitem__(self, idx):
         Tensor
             The feature data
         """
-        if isinstance(idx, slice):
-            return self.data[idx]
+        if idx.slice_data() is not None:
+            slc = idx.slice_data()
+            return F.narrow_row(self.data, slc.start, slc.stop)
         else:
             user_idx = idx.tousertensor(F.context(self.data))
             return F.gather_row(self.data, user_idx)
@@ -105,7 +120,7 @@ def update(self, idx, feats, inplace):
 
         Parameters
         ----------
-        idx : utils.Index or slice
+        idx : utils.Index
             The index.
         feats : Tensor
             The new features.
@@ -115,22 +130,21 @@ def update(self, idx, feats, inplace):
         feat_scheme = infer_scheme(feats)
         if feat_scheme != self.scheme:
             raise DGLError("Cannot update column of scheme %s using feature of scheme %s."
-                    % (feat_scheme, self.scheme))
-
-        if isinstance(idx, utils.Index):
-            idx = idx.tousertensor(F.context(self.data))
+                           % (feat_scheme, self.scheme))
 
         if inplace:
+            idx = idx.tousertensor(F.context(self.data))
             F.scatter_row_inplace(self.data, idx, feats)
+        elif idx.slice_data() is not None:
+            # for contiguous indices narrow+concat is usually faster than scatter row
+            slc = idx.slice_data()
+            part1 = F.narrow_row(self.data, 0, slc.start)
+            part2 = feats
+            part3 = F.narrow_row(self.data, slc.stop, len(self))
+            self.data = F.cat([part1, part2, part3], dim=0)
         else:
-            if isinstance(idx, slice):
-                # for contiguous indices pack is usually faster than scatter row
-                part1 = F.narrow_row(self.data, 0, idx.start)
-                part2 = feats
-                part3 = F.narrow_row(self.data, idx.stop, len(self))
-                self.data = F.cat([part1, part2, part3], dim=0)
-            else:
-                self.data = F.scatter_row(self.data, idx, feats)
+            idx = idx.tousertensor(F.context(self.data))
+            self.data = F.scatter_row(self.data, idx, feats)
 
     def extend(self, feats, feat_scheme=None):
         """Extend the feature data.
@@ -143,11 +157,11 @@ def extend(self, feats, feat_scheme=None):
             The scheme
         """
         if feat_scheme is None:
-            feat_scheme = Scheme.infer_scheme(feats)
+            feat_scheme = infer_scheme(feats)
 
         if feat_scheme != self.scheme:
             raise DGLError("Cannot update column of scheme %s using feature of scheme %s."
-                    % (feat_scheme, self.scheme))
+                           % (feat_scheme, self.scheme))
 
         feats = F.copy_to(feats, F.context(self.data))
         self.data = F.cat([self.data, feats], dim=0)
@@ -314,9 +328,9 @@ def add_column(self, name, scheme, ctx):
             return
         if self.get_initializer(name) is None:
             self._warn_and_set_initializer()
-        init_data = self.get_initializer(name)(
-                (self.num_rows,) + scheme.shape, scheme.dtype,
-                ctx, slice(0, self.num_rows))
+        initializer = self.get_initializer(name)
+        init_data = initializer((self.num_rows,) + scheme.shape, scheme.dtype,
+                                ctx, slice(0, self.num_rows))
         self._columns[name] = Column(init_data, scheme)
 
     def add_rows(self, num_rows):
@@ -336,9 +350,9 @@ def add_rows(self, num_rows):
             ctx = F.context(col.data)
             if self.get_initializer(key) is None:
                 self._warn_and_set_initializer()
-            new_data = self.get_initializer(key)(
-                    (num_rows,) + scheme.shape, scheme.dtype,
-                    ctx, slice(self._num_rows, self._num_rows + num_rows))
+            initializer = self.get_initializer(key)
+            new_data = initializer((num_rows,) + scheme.shape, scheme.dtype,
+                                   ctx, slice(self._num_rows, self._num_rows + num_rows))
             feat_placeholders[key] = new_data
         self._append(Frame(feat_placeholders))
         self._num_rows += num_rows
@@ -368,17 +382,17 @@ def _append(self, other):
         else:
             # pad columns that are not provided in the other frame with initial values
             for key, col in self.items():
-                if key not in other:
-                    scheme = col.scheme
-                    ctx = F.context(col.data)
-                    if self.get_initializer(key) is None:
-                        self._warn_and_set_initializer()
-                    new_data = self.get_initializer(key)(
-                            (other.num_rows,) + scheme.shape, scheme.dtype,
-                            ctx, slice(self._num_rows,
-                                       self._num_rows + other.num_rows)
-                    )
-                    other[key] = new_data
+                if key in other:
+                    continue
+                scheme = col.scheme
+                ctx = F.context(col.data)
+                if self.get_initializer(key) is None:
+                    self._warn_and_set_initializer()
+                initializer = self.get_initializer(key)
+                new_data = initializer((other.num_rows,) + scheme.shape,
+                                       scheme.dtype, ctx,
+                                       slice(self._num_rows, self._num_rows + other.num_rows))
+                other[key] = new_data
             # append other to self
             for key, col in other.items():
                 if key not in self._columns:
@@ -428,23 +442,19 @@ class FrameRef(MutableMapping):
     frame : Frame, optional
         The underlying frame. If not given, the reference will point to a
         new empty frame.
-    index : iterable, slice, or int, optional
+    index : utils.Index, optional
         The rows that are referenced in the underlying frame. If not given,
         the whole frame is referenced. The index should be distinct (no
         duplication is allowed).
-
-        Note that if a slice is given, the step must be None.
     """
     def __init__(self, frame=None, index=None):
         self._frame = frame if frame is not None else Frame()
+        # TODO(minjie): check no duplication
+        assert index is None or isinstance(index, utils.Index)
         if index is None:
-            # _index_data can be either a slice or an iterable
-            self._index_data = slice(0, self._frame.num_rows)
+            self._index = utils.toindex(slice(0, self._frame.num_rows))
         else:
-            # TODO(minjie): check no duplication
-            self._index_data = index
-        self._index = None
-        self._index_or_slice = None
+            self._index = index
 
     @property
     def schemes(self):
@@ -465,11 +475,7 @@ def num_columns(self):
     @property
     def num_rows(self):
         """Return the number of rows referred."""
-        if isinstance(self._index_data, slice):
-            # NOTE: we always assume that slice.step is None
-            return self._index_data.stop - self._index_data.start
-        else:
-            return len(self._index_data)
+        return len(self._index)
 
     def set_initializer(self, initializer, column=None):
         """Set the initializer for empty values.
@@ -500,38 +506,6 @@ def get_initializer(self, column=None):
         """
         return self._frame.get_initializer(column)
 
-    def index(self):
-        """Return the index object.
-
-        Returns
-        -------
-        utils.Index
-            The index.
-        """
-        if self._index is None:
-            if self.is_contiguous():
-                self._index = utils.toindex(
-                        F.arange(self._index_data.start,
-                                 self._index_data.stop))
-            else:
-                self._index = utils.toindex(self._index_data)
-        return self._index
-
-    def index_or_slice(self):
-        """Returns the index object or the slice
-
-        Returns
-        -------
-        utils.Index or slice
-            The index or slice
-        """
-        if self._index_or_slice is None:
-            if self.is_contiguous():
-                self._index_or_slice = self._index_data
-            else:
-                self._index_or_slice = utils.toindex(self._index_data)
-        return self._index_or_slice
-
     def __contains__(self, name):
         """Return whether the column name exists."""
         return name in self._frame
@@ -567,7 +541,7 @@ def __getitem__(self, key):
 
         Parameters
         ----------
-        key : str or utils.Index or slice
+        key : str or utils.Index
             The key.
 
         Returns
@@ -575,12 +549,11 @@ def __getitem__(self, key):
         Tensor or lazy dict or tensors
             Depends on whether it is a column selection or row selection.
         """
+        if not isinstance(key, (str, utils.Index)):
+            raise DGLError('Argument "key" must be either str or utils.Index type.')
         if isinstance(key, str):
             return self.select_column(key)
-        elif isinstance(key, slice) and key == slice(0, self.num_rows):
-            # shortcut for selecting all the rows
-            return self
-        elif isinstance(key, utils.Index) and key.is_slice(0, self.num_rows):
+        elif key.is_slice(0, self.num_rows):
             # shortcut for selecting all the rows
             return self
         else:
@@ -606,7 +579,7 @@ def select_column(self, name):
         if self.is_span_whole_column():
             return col.data
         else:
-            return col[self.index_or_slice()]
+            return col[self._index]
 
     def select_rows(self, query):
         """Return the rows given the query.
@@ -625,9 +598,22 @@ def select_rows(self, query):
         return utils.LazyDict(lambda key: self._frame[key][rows], keys=self.keys())
 
     def __setitem__(self, key, val):
-        self.set_item_inplace(key, val, inplace=False)
+        """Update the data in the frame. The update is done out-of-place.
+
+        Parameters
+        ----------
+        key : str or utils.Index
+            The key.
+        val : Tensor or dict of tensors
+            The value.
 
-    def set_item_inplace(self, key, val, inplace):
+        See Also
+        --------
+        update
+        """
+        self.update_data(key, val, inplace=False)
+
+    def update_data(self, key, val, inplace):
         """Update the data in the frame.
 
         If the provided key is string, the corresponding column data will be updated.
@@ -649,14 +635,14 @@ def set_item_inplace(self, key, val, inplace):
         inplace: bool
             If True, update will be done in place
         """
+        if not isinstance(key, (str, utils.Index)):
+            raise DGLError('Argument "key" must be either str or utils.Index type.')
         if isinstance(key, str):
             self.update_column(key, val, inplace=inplace)
-        elif isinstance(key, slice) and key == slice(0, self.num_rows):
+        elif key.is_slice(0, self.num_rows):
             # shortcut for updating all the rows
-            return self.update(val)
-        elif isinstance(key, utils.Index) and key.is_slice(0, self.num_rows):
-            # shortcut for selecting all the rows
-            return self.update(val)
+            for colname, col in val.items():
+                self.update_column(colname, col, inplace=inplace)
         else:
             self.update_rows(key, val, inplace=inplace)
 
@@ -683,15 +669,14 @@ def update_column(self, name, data, inplace):
             col = Column.create(data)
             if self.num_columns == 0:
                 # the frame is empty
-                self._index_data = slice(0, len(col))
-                self._clear_cache()
+                self._index = utils.toindex(slice(0, len(col)))
             self._frame[name] = col
         else:
             if name not in self._frame:
                 ctx = F.context(data)
                 self._frame.add_column(name, infer_scheme(data), ctx)
             fcol = self._frame[name]
-            fcol.update(self.index_or_slice(), data, inplace)
+            fcol.update(self._index, data, inplace)
 
     def add_rows(self, num_rows):
         """Add blank rows to the underlying frame.
@@ -700,7 +685,7 @@ def add_rows(self, num_rows):
         initializers.
 
         Note: only available for FrameRef that spans the whole column.  The row
-        span will extend to new rows.  Other FrameRefs referencing the same
+        span will extend to new rows. Other FrameRefs referencing the same
         frame will not be affected.
 
         Parameters
@@ -711,10 +696,14 @@ def add_rows(self, num_rows):
         if not self.is_span_whole_column():
             raise RuntimeError('FrameRef not spanning whole column.')
         self._frame.add_rows(num_rows)
-        if self.is_contiguous():
-            self._index_data = slice(0, self._index_data.stop + num_rows)
+        if self._index.slice_data() is not None:
+            # the index is a slice
+            slc = self._index.slice_data()
+            self._index = utils.toindex(slice(slc.start, slc.stop + num_rows))
         else:
-            self._index_data.extend(range(self.num_rows, self.num_rows + num_rows))
+            selfidxdata = self._index.tousertensor()
+            newdata = F.arange(self.num_rows, self.num_rows + num_rows)
+            self._index = utils.toindex(F.cat([selfidxdata, newdata], dim=0))
 
     def update_rows(self, query, data, inplace):
         """Update the rows.
@@ -759,6 +748,8 @@ def __delitem__(self, key):
         key : str or utils.Index
             The key.
         """
+        if not isinstance(key, (str, utils.Index)):
+            raise DGLError('Argument "key" must be either str or utils.Index type.')
         if isinstance(key, str):
             del self._frame[key]
         else:
@@ -769,22 +760,16 @@ def delete_rows(self, query):
 
         Please note that "deleted" rows are not really deleted, but simply removed
         in the reference. As a result, if two FrameRefs point to the same Frame, deleting
-        from one ref will not relect on the other. By contrast, deleting columns is real.
+        from one ref will not reflect on the other. By contrast, deleting columns is real.
 
         Parameters
         ----------
-        query : utils.Index or slice
+        query : utils.Index
             The rows to be deleted.
         """
-        if isinstance(query, slice):
-            query = range(query.start, query.stop)
-        else:
-            query = query.tonumpy()
-
-        if isinstance(self._index_data, slice):
-            self._index_data = range(self._index_data.start, self._index_data.stop)
-        self._index_data = list(np.delete(self._index_data, query))
-        self._clear_cache()
+        query = query.tonumpy()
+        index = self._index.tonumpy()
+        self._index = utils.toindex(np.delete(index, query))
 
     def append(self, other):
         """Append another frame into this one.
@@ -794,59 +779,50 @@ def append(self, other):
         other : dict of str to tensor
             The data to be appended.
         """
-        span_whole = self.is_span_whole_column()
-        contiguous = self.is_contiguous()
         old_nrows = self._frame.num_rows
         self._frame.append(other)
+        new_nrows = self._frame.num_rows
         # update index
-        if span_whole:
-            self._index_data = slice(0, self._frame.num_rows)
-        elif contiguous:
-            if self._index_data.stop == old_nrows:
-                new_idx = slice(self._index_data.start, self._frame.num_rows)
-            else:
-                new_idx = list(range(self._index_data.start, self._index_data.stop))
-                new_idx.extend(range(old_nrows, self._frame.num_rows))
-            self._index_data = new_idx
-        self._clear_cache()
+        if (self._index.slice_data() is not None
+                and self._index.slice_data().stop == old_nrows):
+            # Self index is a slice and index.stop is equal to the size of the
+            # underlying frame. Can still use a slice for the new index.
+            oldstart = self._index.slice_data().start
+            self._index = utils.toindex(slice(oldstart, new_nrows))
+        else:
+            # convert it to user tensor and concat
+            selfidxdata = self._index.tousertensor()
+            newdata = F.arange(old_nrows, new_nrows)
+            self._index = utils.toindex(F.cat([selfidxdata, newdata], dim=0))
 
     def clear(self):
         """Clear the frame."""
         self._frame.clear()
-        self._index_data = slice(0, 0)
-        self._clear_cache()
+        self._index = utils.toindex(slice(0, 0))
 
     def is_contiguous(self):
         """Return whether this refers to a contiguous range of rows."""
         # NOTE: this check could have false negatives
-        # NOTE: we always assume that slice.step is None
-        return isinstance(self._index_data, slice)
+        return self._index.slice_data() is not None
 
     def is_span_whole_column(self):
         """Return whether this refers to all the rows."""
         return self.is_contiguous() and self.num_rows == self._frame.num_rows
 
     def _getrows(self, query):
-        """Internal function to convert from the local row ids to the row ids of the frame."""
-        if self.is_contiguous():
-            start = self._index_data.start
-            if start == 0:
-                # shortcut for identical mapping
-                return query
-            elif isinstance(query, slice):
-                return slice(query.start + start, query.stop + start)
-            else:
-                query = query.tousertensor()
-                return utils.toindex(query + start)
-        else:
-            idxtensor = self.index().tousertensor()
-            query = query.tousertensor()
-            return utils.toindex(F.gather_row(idxtensor, query))
-
-    def _clear_cache(self):
-        """Internal function to clear the cached object."""
-        self._index = None
-        self._index_or_slice = None
+        """Internal function to convert from the local row ids to the row ids of the frame.
+
+        Parameters
+        ----------
+        query : utils.Index
+            The query index.
+
+        Returns
+        -------
+        utils.Index
+            The actual index to the underlying frame.
+        """
+        return self._index.get_items(query)
 
 def frame_like(other, num_rows):
     """Create a new frame that has the same scheme as the given one.
diff --git a/python/dgl/function/__init__.py b/python/dgl/function/__init__.py
index e1aac4c121c8..529687bb3d24 100644
--- a/python/dgl/function/__init__.py
+++ b/python/dgl/function/__init__.py
@@ -1,4 +1,5 @@
 """DGL builtin functors"""
+# pylint: disable=redefined-builtin
 from __future__ import absolute_import
 
 from .message import *
diff --git a/python/dgl/function/base.py b/python/dgl/function/base.py
index fb01079aba29..aa5fb7526138 100644
--- a/python/dgl/function/base.py
+++ b/python/dgl/function/base.py
@@ -1,26 +1,32 @@
 """Built-in function base class"""
 from __future__ import absolute_import
 
+__all__ = ['BuiltinFunction', 'BundledFunction']
+
 class BuiltinFunction(object):
     """Base builtin function class."""
-
-    def __call__(self):
-        """Regular computation of this builtin function
-
-        This will be used when optimization is not available.
-        """
-        raise NotImplementedError
-
     @property
     def name(self):
         """Return the name of this builtin function."""
         raise NotImplementedError
 
 class BundledFunction(object):
+    """A utility class that bundles multiple functions.
+
+    Parameters
+    ----------
+    fn_list : list of callable
+        The function list.
+    """
     def __init__(self, fn_list):
         self.fn_list = fn_list
 
     def __call__(self, *args, **kwargs):
+        """Regular computation of this builtin function
+
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
+        """
         ret = {}
         for fn in self.fn_list:
             ret.update(fn(*args, **kwargs))
@@ -28,4 +34,5 @@ def __call__(self, *args, **kwargs):
 
     @property
     def name(self):
+        """Return the name."""
         return "bundled"
diff --git a/python/dgl/function/message.py b/python/dgl/function/message.py
index af3011aa1abc..89648046f3ae 100644
--- a/python/dgl/function/message.py
+++ b/python/dgl/function/message.py
@@ -1,9 +1,10 @@
 """Built-in message function."""
 from __future__ import absolute_import
 
-from .base import BuiltinFunction
 import operator
-import dgl.backend as F
+
+from .base import BuiltinFunction
+from .. import backend as F
 
 __all__ = ["src_mul_edge", "copy_src", "copy_edge"]
 
@@ -12,9 +13,10 @@ class MessageFunction(BuiltinFunction):
     """Base builtin message function class."""
 
     def __call__(self, edges):
-        """Regular computation of this builtin.
+        """Regular computation of this builtin function
 
-        This will be used when optimization is not available.
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
         """
         raise NotImplementedError
 
@@ -29,9 +31,9 @@ def is_spmv_supported(self, g):
 
     @property
     def use_edge_feature(self):
+        """Return true if the message function uses edge feature data."""
         raise NotImplementedError
 
-
 def _is_spmv_supported_edge_feat(g, field):
     """Return whether the edge feature shape supports SPMV optimization.
 
@@ -43,6 +45,12 @@ def _is_spmv_supported_edge_feat(g, field):
 
 
 class SrcMulEdgeMessageFunction(MessageFunction):
+    """Class for the src_mul_edge builtin message function.
+
+    See Also
+    --------
+    src_mul_edge
+    """
     def __init__(self, mul_op, src_field, edge_field, out_field):
         self.mul_op = mul_op
         self.src_field = src_field
@@ -50,9 +58,26 @@ def __init__(self, mul_op, src_field, edge_field, out_field):
         self.out_field = out_field
 
     def is_spmv_supported(self, g):
+        """Return true if this supports SPMV optimization.
+
+        Parameters
+        ----------
+        g : DGLGraph
+            The graph.
+
+        Returns
+        -------
+        bool
+            True if this supports SPMV optimization.
+        """
         return _is_spmv_supported_edge_feat(g, self.edge_field)
 
     def __call__(self, edges):
+        """Regular computation of this builtin function
+
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
+        """
         sdata = edges.src[self.src_field]
         edata = edges.data[self.edge_field]
         # Due to the different broadcasting semantics of different backends,
@@ -71,17 +96,41 @@ def name(self):
 
     @property
     def use_edge_feature(self):
+        """Return true if the message function uses edge feature data."""
         return True
 
 class CopySrcMessageFunction(MessageFunction):
+    """Class for the copy_src builtin message function.
+
+    See Also
+    --------
+    copy_src
+    """
     def __init__(self, src_field, out_field):
         self.src_field = src_field
         self.out_field = out_field
 
     def is_spmv_supported(self, g):
+        """Return true if this supports SPMV optimization.
+
+        Parameters
+        ----------
+        g : DGLGraph
+            The graph.
+
+        Returns
+        -------
+        bool
+            True if this supports SPMV optimization.
+        """
         return True
 
     def __call__(self, edges):
+        """Regular computation of this builtin function
+
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
+        """
         return {self.out_field : edges.src[self.src_field]}
 
     @property
@@ -90,19 +139,43 @@ def name(self):
 
     @property
     def use_edge_feature(self):
+        """Return true if the message function uses edge feature data."""
         return False
 
 class CopyEdgeMessageFunction(MessageFunction):
+    """Class for the copy_edge builtin message function.
+
+    See Also
+    --------
+    copy_edge
+    """
     def __init__(self, edge_field=None, out_field=None):
         self.edge_field = edge_field
         self.out_field = out_field
 
     def is_spmv_supported(self, g):
+        """Return true if this supports SPMV optimization.
+
+        Parameters
+        ----------
+        g : DGLGraph
+            The graph.
+
+        Returns
+        -------
+        bool
+            True if this supports SPMV optimization.
+        """
         # TODO: support this with e2v spmv
         return False
         # return _is_spmv_supported_edge_feat(g, self.edge_field)
 
     def __call__(self, edges):
+        """Regular computation of this builtin function
+
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
+        """
         return {self.out_field : edges.data[self.edge_field]}
 
     @property
@@ -111,9 +184,9 @@ def name(self):
 
     @property
     def use_edge_feature(self):
+        """Return true if the message function uses edge feature data."""
         return True
 
-
 def src_mul_edge(src, edge, out):
     """Builtin message function that computes message by multiplying source
     node features with edge features.
diff --git a/python/dgl/function/reducer.py b/python/dgl/function/reducer.py
index 42cf2db3e60f..ab3c2e468c70 100644
--- a/python/dgl/function/reducer.py
+++ b/python/dgl/function/reducer.py
@@ -1,4 +1,5 @@
 """Built-in reducer function."""
+# pylint: disable=redefined-builtin
 from __future__ import absolute_import
 
 from .. import backend as F
@@ -10,9 +11,10 @@ class ReduceFunction(BuiltinFunction):
     """Base builtin reduce function class."""
 
     def __call__(self, nodes):
-        """Regular computation of this builtin.
+        """Regular computation of this builtin function
 
-        This will be used when optimization is not available.
+        This will be used when optimization is not available and should
+        ONLY be called by DGL framework.
         """
         raise NotImplementedError
 
@@ -29,18 +31,19 @@ def is_spmv_supported(self):
 class SimpleReduceFunction(ReduceFunction):
     """Builtin reduce function that aggregates a single field into another
     single field."""
-    def __init__(self, name, op, msg_field, out_field):
+    def __init__(self, name, reduce_op, msg_field, out_field):
         self._name = name
-        self.op = op
+        self.reduce_op = reduce_op
         self.msg_field = msg_field
         self.out_field = out_field
 
     def is_spmv_supported(self):
+        """Return whether the SPMV optimization is supported."""
         # NOTE: only sum is supported right now.
         return self._name == "sum"
 
     def __call__(self, nodes):
-        return {self.out_field : self.op(nodes.mailbox[self.msg_field], 1)}
+        return {self.out_field : self.reduce_op(nodes.mailbox[self.msg_field], 1)}
 
     @property
     def name(self):
diff --git a/python/dgl/graph.py b/python/dgl/graph.py
index 560a1c47a18d..52152e25c377 100644
--- a/python/dgl/graph.py
+++ b/python/dgl/graph.py
@@ -1,21 +1,19 @@
 """Base graph class specialized for neural networks on graphs."""
 from __future__ import absolute_import
 
-import networkx as nx
-import numpy as np
 from collections import defaultdict
 
-import dgl
-from .base import ALL, is_all, DGLError, dgl_warning
+from .base import ALL, is_all, DGLError
 from . import backend as F
+from . import init
 from .frame import FrameRef, Frame
-from .graph_index import GraphIndex, create_graph_index
+from .graph_index import create_graph_index
 from .runtime import ir, scheduler, Runtime
+from . import subgraph
 from . import utils
 from .view import NodeView, EdgeView
 from .udf import NodeBatch, EdgeBatch
 
-
 __all__ = ['DGLGraph']
 
 class DGLGraph(object):
@@ -177,7 +175,6 @@ def __init__(self,
                  multigraph=False,
                  readonly=False):
         # graph
-        self._readonly=readonly
         self._graph = create_graph_index(graph_data, multigraph, readonly)
         # node and edge frame
         if node_frame is None:
@@ -194,7 +191,7 @@ def __init__(self,
         # message frame
         self._msg_frame = FrameRef(Frame(num_rows=self.number_of_edges()))
         # set initializer for message frame
-        self._msg_frame.set_initializer(dgl.init.zero_initializer)
+        self._msg_frame.set_initializer(init.zero_initializer)
         # registered functions
         self._message_func = None
         self._reduce_func = None
@@ -916,7 +913,7 @@ def out_edges(self, v, form='uv'):
         else:
             raise DGLError('Invalid form:', form)
 
-    def all_edges(self, form='uv', sorted=False):
+    def all_edges(self, form='uv', return_sorted=False):
         """Return all the edges.
 
         Parameters
@@ -927,7 +924,7 @@ def all_edges(self, form='uv', sorted=False):
             - 'all' : a tuple (u, v, eid)
             - 'uv'  : a pair (u, v), default
             - 'eid' : one eid tensor
-        sorted : bool
+        return_sorted : bool
             True if the returned edges are sorted by their src and dst ids.
 
         Returns
@@ -954,7 +951,7 @@ def all_edges(self, form='uv', sorted=False):
         >>> G.all_edges('all')
         (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2]))
         """
-        src, dst, eid = self._graph.edges(sorted)
+        src, dst, eid = self._graph.edges(return_sorted)
         if form == 'all':
             return (src.tousertensor(), dst.tousertensor(), eid.tousertensor())
         elif form == 'uv':
@@ -1021,7 +1018,7 @@ def in_degrees(self, v=ALL):
         in_degree
         """
         if is_all(v):
-            v  = utils.toindex(slice(0, self.number_of_nodes()))
+            v = utils.toindex(slice(0, self.number_of_nodes()))
         else:
             v = utils.toindex(v)
         return self._graph.in_degrees(v).tousertensor()
@@ -1083,7 +1080,7 @@ def out_degrees(self, v=ALL):
         out_degree
         """
         if is_all(v):
-            v  = utils.toindex(slice(0, self.number_of_nodes()))
+            v = utils.toindex(slice(0, self.number_of_nodes()))
         else:
             v = utils.toindex(v)
         return self._graph.out_degrees(v).tousertensor()
@@ -1121,13 +1118,13 @@ def to_networkx(self, node_attrs=None, edge_attrs=None):
         nx_graph = self._graph.to_networkx()
         if node_attrs is not None:
             for nid, attr in nx_graph.nodes(data=True):
-                nf = self.get_n_repr(nid)
-                attr.update({key: nf[key].squeeze(0) for key in node_attrs})
+                feat_dict = self.get_n_repr(nid)
+                attr.update({key: feat_dict[key].squeeze(0) for key in node_attrs})
         if edge_attrs is not None:
-            for u, v, attr in nx_graph.edges(data=True):
+            for _, _, attr in nx_graph.edges(data=True):
                 eid = attr['id']
-                ef = self.get_e_repr(eid)
-                attr.update({key: ef[key].squeeze(0) for key in edge_attrs})
+                feat_dict = self.get_e_repr(eid)
+                attr.update({key: feat_dict[key].squeeze(0) for key in edge_attrs})
         return nx_graph
 
     def from_networkx(self, nx_graph, node_attrs=None, edge_attrs=None):
@@ -1208,12 +1205,12 @@ def _batcher(lst):
             for attr in edge_attrs:
                 self._edge_frame[attr] = _batcher(attr_dict[attr])
 
-    def from_scipy_sparse_matrix(self, a):
+    def from_scipy_sparse_matrix(self, spmat):
         """ Convert from scipy sparse matrix.
 
         Parameters
         ----------
-        a : scipy sparse matrix
+        spmat : scipy sparse matrix
             The graph's adjacency matrix
 
         Examples
@@ -1227,7 +1224,7 @@ def from_scipy_sparse_matrix(self, a):
         >>> g.from_scipy_sparse_matrix(a)
         """
         self.clear()
-        self._graph.from_scipy_sparse_matrix(a)
+        self._graph.from_scipy_sparse_matrix(spmat)
         self._node_frame.add_rows(self.number_of_nodes())
         self._edge_frame.add_rows(self.number_of_edges())
         self._msg_index = utils.zero_index(self.number_of_edges())
@@ -1502,10 +1499,10 @@ def edata(self):
         """
         return self.edges[:].data
 
-    def set_n_repr(self, hu, u=ALL, inplace=False):
+    def set_n_repr(self, data, u=ALL, inplace=False):
         """Set node(s) representation.
 
-        `hu` is a dictionary from the feature name to feature tensor. Each tensor
+        `data` is a dictionary from the feature name to feature tensor. Each tensor
         is of shape (B, D1, D2, ...), where B is the number of nodes to be updated,
         and (D1, D2, ...) be the shape of the node representation tensor. The
         length of the given node ids must match B (i.e, len(u) == B).
@@ -1515,7 +1512,7 @@ def set_n_repr(self, hu, u=ALL, inplace=False):
 
         Parameters
         ----------
-        hu : dict of tensor
+        data : dict of tensor
             Node representation.
         u : node, container or tensor
             The node(s).
@@ -1523,25 +1520,25 @@ def set_n_repr(self, hu, u=ALL, inplace=False):
             If True, update will be done in place, but autograd will break.
         """
         # sanity check
-        if not utils.is_dict_like(hu):
+        if not utils.is_dict_like(data):
             raise DGLError('Expect dictionary type for feature data.'
-                           ' Got "%s" instead.' % type(hu))
+                           ' Got "%s" instead.' % type(data))
         if is_all(u):
             num_nodes = self.number_of_nodes()
         else:
             u = utils.toindex(u)
             num_nodes = len(u)
-        for key, val in hu.items():
+        for key, val in data.items():
             nfeats = F.shape(val)[0]
             if nfeats != num_nodes:
                 raise DGLError('Expect number of features to match number of nodes (len(u)).'
                                ' Got %d and %d instead.' % (nfeats, num_nodes))
         # set
         if is_all(u):
-            for key, val in hu.items():
+            for key, val in data.items():
                 self._node_frame[key] = val
         else:
-            self._node_frame.update_rows(u, hu, inplace=inplace)
+            self._node_frame.update_rows(u, data, inplace=inplace)
 
     def get_n_repr(self, u=ALL):
         """Get node(s) representation.
@@ -1581,10 +1578,10 @@ def pop_n_repr(self, key):
         """
         return self._node_frame.pop(key)
 
-    def set_e_repr(self, he, edges=ALL, inplace=False):
+    def set_e_repr(self, data, edges=ALL, inplace=False):
         """Set edge(s) representation.
 
-        `he` is a dictionary from the feature name to feature tensor. Each tensor
+        `data` is a dictionary from the feature name to feature tensor. Each tensor
         is of shape (B, D1, D2, ...), where B is the number of edges to be updated,
         and (D1, D2, ...) be the shape of the edge representation tensor.
 
@@ -1593,7 +1590,7 @@ def set_e_repr(self, he, edges=ALL, inplace=False):
 
         Parameters
         ----------
-        he : tensor or dict of tensor
+        data : tensor or dict of tensor
             Edge representation.
         edges : edges
             Edges can be a pair of endpoint nodes (u, v), or a
@@ -1614,16 +1611,16 @@ def set_e_repr(self, he, edges=ALL, inplace=False):
             eid = utils.toindex(edges)
 
         # sanity check
-        if not utils.is_dict_like(he):
+        if not utils.is_dict_like(data):
             raise DGLError('Expect dictionary type for feature data.'
-                           ' Got "%s" instead.' % type(he))
+                           ' Got "%s" instead.' % type(data))
 
         if is_all(eid):
             num_edges = self.number_of_edges()
         else:
             eid = utils.toindex(eid)
             num_edges = len(eid)
-        for key, val in he.items():
+        for key, val in data.items():
             nfeats = F.shape(val)[0]
             if nfeats != num_edges:
                 raise DGLError('Expect number of features to match number of edges.'
@@ -1631,11 +1628,11 @@ def set_e_repr(self, he, edges=ALL, inplace=False):
         # set
         if is_all(eid):
             # update column
-            for key, val in he.items():
+            for key, val in data.items():
                 self._edge_frame[key] = val
         else:
             # update row
-            self._edge_frame.update_rows(eid, he, inplace=inplace)
+            self._edge_frame.update_rows(eid, data, inplace=inplace)
 
     def get_e_repr(self, edges=ALL):
         """Get node(s) representation.
@@ -2491,8 +2488,7 @@ def prop_nodes(self,
         prop_edges
         """
         for node_frontier in nodes_generator:
-            self.pull(node_frontier,
-                    message_func, reduce_func, apply_node_func)
+            self.pull(node_frontier, message_func, reduce_func, apply_node_func)
 
     def prop_edges(self,
                    edges_generator,
@@ -2573,8 +2569,7 @@ def prop_edges(self,
         prop_nodes
         """
         for edge_frontier in edges_generator:
-            self.send_and_recv(edge_frontier,
-                    message_func, reduce_func, apply_node_func)
+            self.send_and_recv(edge_frontier, message_func, reduce_func, apply_node_func)
 
     def subgraph(self, nodes):
         """Return the subgraph induced on given nodes.
@@ -2621,7 +2616,7 @@ def subgraph(self, nodes):
         """
         induced_nodes = utils.toindex(nodes)
         sgi = self._graph.node_subgraph(induced_nodes)
-        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
+        return subgraph.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
 
     def subgraphs(self, nodes):
         """Return a list of subgraphs, each induced in the corresponding given
@@ -2648,8 +2643,8 @@ def subgraphs(self, nodes):
         """
         induced_nodes = [utils.toindex(n) for n in nodes]
         sgis = self._graph.node_subgraphs(induced_nodes)
-        return [dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges,
-            sgi) for sgi in sgis]
+        return [subgraph.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
+                for sgi in sgis]
 
     def edge_subgraph(self, edges):
         """Return the subgraph induced on given edges.
@@ -2695,7 +2690,7 @@ def edge_subgraph(self, edges):
         """
         induced_edges = utils.toindex(edges)
         sgi = self._graph.edge_subgraph(induced_edges)
-        return dgl.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
+        return subgraph.DGLSubGraph(self, sgi.induced_nodes, sgi.induced_edges, sgi)
 
     def adjacency_matrix(self, transpose=False, ctx=F.cpu()):
         """Return the adjacency matrix representation of this graph.
@@ -2720,7 +2715,7 @@ def adjacency_matrix(self, transpose=False, ctx=F.cpu()):
         """
         return self._graph.adjacency_matrix(transpose, ctx)[0]
 
-    def incidence_matrix(self, type, ctx=F.cpu()):
+    def incidence_matrix(self, typestr, ctx=F.cpu()):
         """Return the incidence matrix representation of this graph.
 
         An incidence matrix is an n x m sparse matrix, where n is
@@ -2750,7 +2745,7 @@ def incidence_matrix(self, type, ctx=F.cpu()):
 
         Parameters
         ----------
-        type : str
+        typestr : str
             Can be either ``in``, ``out`` or ``both``
         ctx : context, optional (default=cpu)
             The context of returned incidence matrix.
@@ -2760,7 +2755,7 @@ def incidence_matrix(self, type, ctx=F.cpu()):
         SparseTensor
             The incidence matrix.
         """
-        return self._graph.incidence_matrix(type, ctx)[0]
+        return self._graph.incidence_matrix(typestr, ctx)[0]
 
     def line_graph(self, backtracking=True, shared=False):
         """Return the line graph of this graph.
@@ -2833,8 +2828,8 @@ def filter_nodes(self, predicate, nodes=ALL):
             v = utils.toindex(nodes)
 
         n_repr = self.get_n_repr(v)
-        nb = NodeBatch(self, v, n_repr)
-        n_mask = predicate(nb)
+        nbatch = NodeBatch(self, v, n_repr)
+        n_mask = predicate(nbatch)
 
         if is_all(nodes):
             return F.nonzero_1d(n_mask)
@@ -2906,10 +2901,8 @@ def filter_edges(self, predicate, edges=ALL):
         src_data = self.get_n_repr(u)
         edge_data = self.get_e_repr(eid)
         dst_data = self.get_n_repr(v)
-        eb = EdgeBatch(self, (u, v, eid),
-                src_data, edge_data, dst_data)
-
-        e_mask = predicate(eb)
+        ebatch = EdgeBatch(self, (u, v, eid), src_data, edge_data, dst_data)
+        e_mask = predicate(ebatch)
 
         if is_all(edges):
             return F.nonzero_1d(e_mask)
@@ -2918,7 +2911,9 @@ def filter_edges(self, predicate, edges=ALL):
             return edges[e_mask]
 
     def __repr__(self):
-        s = 'DGLGraph with {node} nodes and {edge} edges.\nNode data: {ndata}\nEdge data: {edata}'
-        return s.format(node=self.number_of_nodes(), edge=self.number_of_edges(),
-                        ndata=str(self.node_attr_schemes()),
-                        edata=str(self.edge_attr_schemes()))
+        ret = ('DGLGraph(num_nodes={node}, num_edges={edge},\n'
+               '         ndata_schemes={ndata}\n'
+               '         edata_schemes={edata})')
+        return ret.format(node=self.number_of_nodes(), edge=self.number_of_edges(),
+                          ndata=str(self.node_attr_schemes()),
+                          edata=str(self.edge_attr_schemes()))
diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py
index ab928e70ca7a..6c9ba1c58f73 100644
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -1,3 +1,4 @@
+"""Module for graph index class definition."""
 from __future__ import absolute_import
 
 import ctypes
@@ -7,7 +8,7 @@
 
 from ._ffi.base import c_array
 from ._ffi.function import _init_api
-from .base import DGLError, is_all
+from .base import DGLError
 from . import backend as F
 from . import utils
 from .immutable_graph_index import create_immutable_graph_index
@@ -58,7 +59,7 @@ def add_nodes(self, num):
         num : int
             Number of nodes to be added.
         """
-        _CAPI_DGLGraphAddVertices(self._handle, num);
+        _CAPI_DGLGraphAddVertices(self._handle, num)
         self.clear_cache()
 
     def add_edge(self, u, v):
@@ -71,7 +72,7 @@ def add_edge(self, u, v):
         v : int
             The dst node.
         """
-        _CAPI_DGLGraphAddEdge(self._handle, u, v);
+        _CAPI_DGLGraphAddEdge(self._handle, u, v)
         self.clear_cache()
 
     def add_edges(self, u, v):
@@ -366,12 +367,12 @@ def out_edges(self, v):
         return src, dst, eid
 
     @utils.cached_member(cache='_cache', prefix='edges')
-    def edges(self, sorted=False):
+    def edges(self, return_sorted=False):
         """Return all the edges
 
         Parameters
         ----------
-        sorted : bool
+        return_sorted : bool
             True if the returned edges are sorted by their src and dst ids.
 
         Returns
@@ -383,9 +384,9 @@ def edges(self, sorted=False):
         utils.Index
             The edge ids.
         """
-        key = 'edges_s%d' % sorted
+        key = 'edges_s%d' % return_sorted
         if key not in self._cache:
-            edge_array = _CAPI_DGLGraphEdges(self._handle, sorted)
+            edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted)
             src = utils.toindex(edge_array(0))
             dst = utils.toindex(edge_array(1))
             eid = utils.toindex(edge_array(2))
@@ -505,7 +506,6 @@ def edge_subgraph(self, e):
         """
         e_array = e.todgltensor()
         rst = _CAPI_DGLGraphEdgeSubgraph(self._handle, e_array)
-        gi = GraphIndex(rst(0))
         induced_nodes = utils.toindex(rst(1))
         return SubgraphIndex(rst(0), self, induced_nodes, e)
 
@@ -555,7 +555,7 @@ def adjacency_matrix(self, transpose, ctx):
         return adj, shuffle_idx
 
     @utils.cached_member(cache='_cache', prefix='inc')
-    def incidence_matrix(self, type, ctx):
+    def incidence_matrix(self, typestr, ctx):
         """Return the incidence matrix representation of this graph.
 
         An incidence matrix is an n x m sparse matrix, where n is
@@ -577,7 +577,7 @@ def incidence_matrix(self, type, ctx):
 
         Parameters
         ----------
-        type : str
+        typestr : str
             Can be either "in", "out" or "both"
         ctx : context
             The context of returned incidence matrix.
@@ -596,21 +596,21 @@ def incidence_matrix(self, type, ctx):
         eid = eid.tousertensor(ctx)  # the index of the ctx will be cached
         n = self.number_of_nodes()
         m = self.number_of_edges()
-        if type == 'in':
+        if typestr == 'in':
             row = F.unsqueeze(dst, 0)
             col = F.unsqueeze(eid, 0)
             idx = F.cat([row, col], dim=0)
             # FIXME(minjie): data type
             dat = F.ones((m,), dtype=F.float32, ctx=ctx)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
-        elif type == 'out':
+        elif typestr == 'out':
             row = F.unsqueeze(src, 0)
             col = F.unsqueeze(eid, 0)
             idx = F.cat([row, col], dim=0)
             # FIXME(minjie): data type
             dat = F.ones((m,), dtype=F.float32, ctx=ctx)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
-        elif type == 'both':
+        elif typestr == 'both':
             # create index
             row = F.unsqueeze(F.cat([src, dst], dim=0), 0)
             col = F.unsqueeze(F.cat([eid, eid], dim=0), 0)
@@ -625,7 +625,7 @@ def incidence_matrix(self, type, ctx):
             dat = F.cat([x, y], dim=0)
             inc, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, m))
         else:
-            raise DGLError('Invalid incidence matrix type: %s' % str(type))
+            raise DGLError('Invalid incidence matrix type: %s' % str(typestr))
         shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
         return inc, shuffle_idx
 
@@ -642,8 +642,8 @@ def to_networkx(self):
         src, dst, eid = self.edges()
         ret = nx.MultiDiGraph() if self.is_multigraph() else nx.DiGraph()
         ret.add_nodes_from(range(self.number_of_nodes()))
-        for u, v, id in zip(src, dst, eid):
-            ret.add_edge(u, v, id=id)
+        for u, v, e in zip(src, dst, eid):
+            ret.add_edge(u, v, id=e)
         return ret
 
     def from_networkx(self, nx_graph):
@@ -661,7 +661,7 @@ def from_networkx(self, nx_graph):
 
         if not isinstance(nx_graph, nx.Graph):
             nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
-                    else nx.DiGraph(nx_graph))
+                        else nx.DiGraph(nx_graph))
         else:
             nx_graph = nx_graph.to_directed()
 
@@ -797,11 +797,11 @@ def induced_edges(self):
 
     def __getstate__(self):
         raise NotImplementedError(
-                "SubgraphIndex pickling is not supported yet.")
+            "SubgraphIndex pickling is not supported yet.")
 
     def __setstate__(self, state):
         raise NotImplementedError(
-                "SubgraphIndex unpickling is not supported yet.")
+            "SubgraphIndex unpickling is not supported yet.")
 
 def map_to_subgraph_nid(subgraph, parent_nids):
     """Map parent node Ids to the subgraph node Ids.
@@ -820,7 +820,7 @@ def map_to_subgraph_nid(subgraph, parent_nids):
         Node Ids in the subgraph.
     """
     return utils.toindex(_CAPI_DGLMapSubgraphNID(subgraph.induced_nodes.todgltensor(),
-        parent_nids.todgltensor()))
+                                                 parent_nids.todgltensor()))
 
 def disjoint_union(graphs):
     """Return a disjoint union of the input graphs.
@@ -868,12 +868,12 @@ def disjoint_partition(graph, num_or_size_splits):
     """
     if isinstance(num_or_size_splits, utils.Index):
         rst = _CAPI_DGLDisjointPartitionBySizes(
-                graph._handle,
-                num_or_size_splits.todgltensor())
+            graph._handle,
+            num_or_size_splits.todgltensor())
     else:
         rst = _CAPI_DGLDisjointPartitionByNum(
-                graph._handle,
-                int(num_or_size_splits))
+            graph._handle,
+            int(num_or_size_splits))
     graphs = []
     for val in rst.asnumpy():
         handle = ctypes.cast(int(val), ctypes.c_void_p)
@@ -891,46 +891,41 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
         Whether the graph is multigraph (default is False)
     """
     if isinstance(graph_data, GraphIndex):
+        # FIXME(minjie): this return is not correct for mutable graph index
         return graph_data
 
-    if readonly and graph_data is not None:
-        try:
-            gi = create_immutable_graph_index(graph_data)
-        except:
-            gi = None
-        # If we can't create an immutable graph index, we'll have to fall back.
-        if gi is not None:
-            return gi
+    if readonly:
+        return create_immutable_graph_index(graph_data)
 
     handle = _CAPI_DGLGraphCreate(multigraph)
-    gi = GraphIndex(handle)
+    gidx = GraphIndex(handle)
 
     if graph_data is None:
-        return gi
+        return gidx
 
     # edge list
     if isinstance(graph_data, (list, tuple)):
         try:
-            gi.from_edge_list(graph_data)
-            return gi
-        except:
+            gidx.from_edge_list(graph_data)
+            return gidx
+        except Exception:  # pylint: disable=broad-except
             raise DGLError('Graph data is not a valid edge list.')
 
     # scipy format
     if isinstance(graph_data, scipy.sparse.spmatrix):
         try:
-            gi.from_scipy_sparse_matrix(graph_data)
-            return gi
-        except:
+            gidx.from_scipy_sparse_matrix(graph_data)
+            return gidx
+        except Exception:  # pylint: disable=broad-except
             raise DGLError('Graph data is not a valid scipy sparse matrix.')
 
     # networkx - any format
     try:
-        gi.from_networkx(graph_data)
-    except:
+        gidx.from_networkx(graph_data)
+    except Exception:  # pylint: disable=broad-except
         raise DGLError('Error while creating graph from input of type "%s".'
-                         % type(graph_data))
+                       % type(graph_data))
 
-    return gi
+    return gidx
 
 _init_api("dgl.graph_index")
diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py
index 138224404c22..52d12d9a4dd0 100644
--- a/python/dgl/immutable_graph_index.py
+++ b/python/dgl/immutable_graph_index.py
@@ -1,6 +1,11 @@
+"""Module for immutable graph index.
+
+NOTE: this is currently a temporary solution.
+"""
+# pylint: disable=abstract-method,unused-argument
+
 from __future__ import absolute_import
 
-import ctypes
 import numpy as np
 import networkx as nx
 import scipy.sparse as sp
@@ -8,7 +13,7 @@
 from ._ffi.function import _init_api
 from . import backend as F
 from . import utils
-from .base import ALL, is_all, DGLError
+from .base import DGLError
 
 class ImmutableGraphIndex(object):
     """Graph index object on immutable graphs.
@@ -27,7 +32,7 @@ def __init__(self, backend_sparse):
 
     def add_nodes(self, num):
         """Add nodes.
-        
+
         Parameters
         ----------
         num : int
@@ -37,7 +42,7 @@ def add_nodes(self, num):
 
     def add_edge(self, u, v):
         """Add one edge.
-        
+
         Parameters
         ----------
         u : int
@@ -49,7 +54,7 @@ def add_edge(self, u, v):
 
     def add_edges(self, u, v):
         """Add many edges.
-        
+
         Parameters
         ----------
         u : utils.Index
@@ -229,8 +234,8 @@ def edge_id(self, u, v):
         """
         u = F.tensor([u], dtype=F.int64)
         v = F.tensor([v], dtype=F.int64)
-        _, _, id = self._sparse.edge_ids(u, v)
-        return utils.toindex(id)
+        _, _, eid = self._sparse.edge_ids(u, v)
+        return utils.toindex(eid)
 
     def edge_ids(self, u, v):
         """Return the edge ids.
@@ -282,7 +287,7 @@ def in_edges(self, v):
         ----------
         v : utils.Index
             The node(s).
-        
+
         Returns
         -------
         utils.Index
@@ -305,7 +310,7 @@ def out_edges(self, v):
         ----------
         v : utils.Index
             The node(s).
-        
+
         Returns
         -------
         utils.Index
@@ -321,14 +326,14 @@ def out_edges(self, v):
         src = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor())
         return utils.toindex(src), utils.toindex(dst), utils.toindex(edges)
 
-    def edges(self, sorted=False):
+    def edges(self, return_sorted=False):
         """Return all the edges
 
         Parameters
         ----------
-        sorted : bool
+        return_sorted : bool
             True if the returned edges are sorted by their src and dst ids.
-        
+
         Returns
         -------
         utils.Index
@@ -340,7 +345,7 @@ def edges(self, sorted=False):
         """
         if "all_edges" in self._cache:
             return self._cache["all_edges"]
-        src, dst, edges = self._sparse.edges(sorted)
+        src, dst, edges = self._sparse.edges(return_sorted)
         self._cache["all_edges"] = (utils.toindex(src), utils.toindex(dst), utils.toindex(edges))
         return self._cache["all_edges"]
 
@@ -440,8 +445,8 @@ def node_subgraph(self, v):
             The subgraph index.
         """
         v = v.tousertensor()
-        gi, induced_n, induced_e = self._sparse.node_subgraph(v)
-        return ImmutableSubgraphIndex(gi, self, induced_n, induced_e)
+        gidx, induced_n, induced_e = self._sparse.node_subgraph(v)
+        return ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
 
     def node_subgraphs(self, vs_arr):
         """Return the induced node subgraphs.
@@ -458,8 +463,8 @@ def node_subgraphs(self, vs_arr):
         """
         vs_arr = [v.tousertensor() for v in vs_arr]
         gis, induced_nodes, induced_edges = self._sparse.node_subgraphs(vs_arr)
-        return [ImmutableSubgraphIndex(gi, self, induced_n,
-            induced_e) for gi, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
+        return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
+                for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
 
     def edge_subgraph(self, e):
         """Return the induced edge subgraph.
@@ -478,6 +483,7 @@ def edge_subgraph(self, e):
 
     def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type,
                           node_prob, max_subgraph_size):
+        """Neighborhood sampling"""
         if len(seed_ids) == 0:
             return []
         seed_ids = [v.tousertensor() for v in seed_ids]
@@ -486,8 +492,8 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type,
                                                                            node_prob,
                                                                            max_subgraph_size)
         induced_nodes = [utils.toindex(v) for v in induced_nodes]
-        return [ImmutableSubgraphIndex(gi, self, induced_n,
-            induced_e) for gi, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
+        return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e)
+                for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)]
 
     def adjacency_matrix(self, transpose=False, ctx=F.cpu()):
         """Return the adjacency matrix representation of this graph.
@@ -511,12 +517,9 @@ def adjacency_matrix(self, transpose=False, ctx=F.cpu()):
             A index for data shuffling due to sparse format change. Return None
             if shuffle is not required.
         """
-        def get_adj(ctx):
-            new_mat = self._sparse.adjacency_matrix(transpose)
-            return F.copy_to(new_mat, ctx)
         return self._sparse.adjacency_matrix(transpose, ctx), None
 
-    def incidence_matrix(self, type, ctx):
+    def incidence_matrix(self, typestr, ctx):
         """Return the incidence matrix representation of this graph.
 
         An incidence matrix is an n x m sparse matrix, where n is
@@ -538,7 +541,7 @@ def incidence_matrix(self, type, ctx):
 
         Parameters
         ----------
-        type : str
+        typestr : str
             Can be either "in", "out" or "both"
         ctx : context
             The context of returned incidence matrix.
@@ -565,8 +568,8 @@ def to_networkx(self):
         """
         src, dst, eid = self.edges()
         ret = nx.DiGraph()
-        for u, v, id in zip(src, dst, eid):
-            ret.add_edge(u, v, id=id)
+        for u, v, e in zip(src, dst, eid):
+            ret.add_edge(u, v, id=e)
         return ret
 
     def from_networkx(self, nx_graph):
@@ -574,7 +577,7 @@ def from_networkx(self, nx_graph):
 
         If 'id' edge attribute exists, the edge will be added follows
         the edge id order. Otherwise, order is undefined.
-        
+
         Parameters
         ----------
         nx_graph : networkx.DiGraph
@@ -582,7 +585,7 @@ def from_networkx(self, nx_graph):
         """
         if not isinstance(nx_graph, nx.Graph):
             nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph()
-                    else nx.DiGraph(nx_graph))
+                        else nx.DiGraph(nx_graph))
         else:
             nx_graph = nx_graph.to_directed()
 
@@ -626,8 +629,8 @@ def from_scipy_sparse_matrix(self, adj):
         ----------
         adj : scipy sparse matrix
         """
-        assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \
-                "The input matrix has to be a SciPy sparse matrix."
+        if not isinstance(adj, (sp.csr_matrix, sp.coo_matrix)):
+            raise DGLError("The input matrix has to be a SciPy sparse matrix.")
         out_mat = adj.tocoo()
         self._sparse.from_coo_matrix(out_mat)
 
@@ -639,23 +642,7 @@ def from_edge_list(self, elist):
         elist : list
             List of (u, v) edge tuple.
         """
-        self.clear()
-        src, dst = zip(*elist)
-        src = np.array(src)
-        dst = np.array(dst)
-        num_nodes = max(src.max(), dst.max()) + 1
-        min_nodes = min(src.min(), dst.min())
-        if min_nodes != 0:
-            raise DGLError('Invalid edge list. Nodes must start from 0.')
-        edge_ids = mx.nd.arange(0, len(src), step=1, repeat=1, dtype=np.int32)
-        src = mx.nd.array(src, dtype=np.int64)
-        dst = mx.nd.array(dst, dtype=np.int64)
-        # TODO we can't generate a csr_matrix with np.int64 directly.
-        in_csr = mx.nd.sparse.csr_matrix((edge_ids, (dst, src)),
-                                         shape=(num_nodes, num_nodes)).astype(np.int64)
-        out_csr = mx.nd.sparse.csr_matrix((edge_ids, (src, dst)),
-                                          shape=(num_nodes, num_nodes)).astype(np.int64)
-        self.__init__(in_csr, out_csr)
+        self._sparse.from_edge_list(elist)
 
     def line_graph(self, backtracking=True):
         """Return the line graph of this graph.
@@ -778,35 +765,35 @@ def create_immutable_graph_index(graph_data=None):
         # If graph_data is None, we return an empty graph index.
         # If we can't create a graph index, we'll use the code below to handle the graph.
         return ImmutableGraphIndex(F.create_immutable_graph_index(graph_data))
-    except:
+    except Exception:  # pylint: disable=broad-except
         pass
 
     # Let's create an empty graph index first.
-    gi = ImmutableGraphIndex(F.create_immutable_graph_index())
+    gidx = ImmutableGraphIndex(F.create_immutable_graph_index())
 
     # edge list
     if isinstance(graph_data, (list, tuple)):
         try:
-            gi.from_edge_list(graph_data)
-            return gi
-        except:
+            gidx.from_edge_list(graph_data)
+            return gidx
+        except Exception:  # pylint: disable=broad-except
             raise DGLError('Graph data is not a valid edge list.')
 
     # scipy format
     if isinstance(graph_data, sp.spmatrix):
         try:
-            gi.from_scipy_sparse_matrix(graph_data)
-            return gi
-        except:
+            gidx.from_scipy_sparse_matrix(graph_data)
+            return gidx
+        except Exception:  # pylint: disable=broad-except
             raise DGLError('Graph data is not a valid scipy sparse matrix.')
 
     # networkx - any format
     try:
-        gi.from_networkx(graph_data)
-    except:
+        gidx.from_networkx(graph_data)
+    except Exception:  # pylint: disable=broad-except
         raise DGLError('Error while creating graph from input of type "%s".'
-                         % type(graph_data))
+                       % type(graph_data))
 
-    return gi
+    return gidx
 
 _init_api("dgl.immutable_graph_index")
diff --git a/python/dgl/init.py b/python/dgl/init.py
index 3bbe4c9f5b61..554ed0d3d4be 100644
--- a/python/dgl/init.py
+++ b/python/dgl/init.py
@@ -5,7 +5,7 @@
 
 __all__ = ['base_initializer', 'zero_initializer']
 
-def base_initializer(shape, dtype, ctx, range):
+def base_initializer(shape, dtype, ctx, id_range):  # pylint: disable=unused-argument
     """The function signature for feature initializer.
 
     Any customized feature initializer should follow this signature (see
@@ -20,7 +20,7 @@ def base_initializer(shape, dtype, ctx, range):
         The data type of the returned features.
     ctx : context object
         The device context of the returned features.
-    range : slice
+    id_range : slice
         The start id and the end id of the features to be initialized.
         The id could be node or edge id depending on the scenario.
         Note that the step is always None.
@@ -32,7 +32,7 @@ def base_initializer(shape, dtype, ctx, range):
 
     >>> import torch
     >>> import dgl
-    >>> def initializer(shape, dtype, ctx, range):
+    >>> def initializer(shape, dtype, ctx, id_range):
     >>>     return torch.ones(shape, dtype=dtype, device=ctx)
     >>> g = dgl.DGLGraph()
     >>> g.set_n_initializer(initializer)
@@ -44,7 +44,7 @@ def base_initializer(shape, dtype, ctx, range):
     """
     raise NotImplementedError
 
-def zero_initializer(shape, dtype, ctx, range):
+def zero_initializer(shape, dtype, ctx, id_range):  # pylint: disable=unused-argument
     """Zero feature initializer
 
     Examples
diff --git a/python/dgl/propagate.py b/python/dgl/propagate.py
index a963dd78e59e..3e5f291f3a41 100644
--- a/python/dgl/propagate.py
+++ b/python/dgl/propagate.py
@@ -56,7 +56,7 @@ def prop_edges(graph,
 
 def prop_nodes_bfs(graph,
                    source,
-                   reversed=False,
+                   reverse=False,
                    message_func='default',
                    reduce_func='default',
                    apply_node_func='default'):
@@ -68,7 +68,7 @@ def prop_nodes_bfs(graph,
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, optional
+    reverse : bool, optional
         If true, traverse following the in-edge direction.
     message_func : callable, optional
         The message function.
@@ -81,11 +81,11 @@ def prop_nodes_bfs(graph,
     --------
     dgl.traversal.bfs_nodes_generator
     """
-    nodes_gen = trv.bfs_nodes_generator(graph, source, reversed)
+    nodes_gen = trv.bfs_nodes_generator(graph, source, reverse)
     prop_nodes(graph, nodes_gen, message_func, reduce_func, apply_node_func)
 
 def prop_nodes_topo(graph,
-                    reversed=False,
+                    reverse=False,
                     message_func='default',
                     reduce_func='default',
                     apply_node_func='default'):
@@ -95,7 +95,7 @@ def prop_nodes_topo(graph,
     ----------
     graph : DGLGraph
         The graph object.
-    reversed : bool, optional
+    reverse : bool, optional
         If true, traverse following the in-edge direction.
     message_func : callable, optional
         The message function.
@@ -108,12 +108,12 @@ def prop_nodes_topo(graph,
     --------
     dgl.traversal.topological_nodes_generator
     """
-    nodes_gen = trv.topological_nodes_generator(graph, reversed)
+    nodes_gen = trv.topological_nodes_generator(graph, reverse)
     prop_nodes(graph, nodes_gen, message_func, reduce_func, apply_node_func)
 
 def prop_edges_dfs(graph,
                    source,
-                   reversed=False,
+                   reverse=False,
                    has_reverse_edge=False,
                    has_nontree_edge=False,
                    message_func='default',
@@ -127,7 +127,7 @@ def prop_edges_dfs(graph,
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, optional
+    reverse : bool, optional
         If true, traverse following the in-edge direction.
     message_func : callable, optional
         The message function.
@@ -141,6 +141,6 @@ def prop_edges_dfs(graph,
     dgl.traversal.dfs_labeled_edges_generator
     """
     edges_gen = trv.dfs_labeled_edges_generator(
-            graph, source, reversed, has_reverse_edge, has_nontree_edge,
-            return_labels=False)
+        graph, source, reverse, has_reverse_edge, has_nontree_edge,
+        return_labels=False)
     prop_edges(graph, edges_gen, message_func, reduce_func, apply_node_func)
diff --git a/python/dgl/runtime/__init__.py b/python/dgl/runtime/__init__.py
index 22948c3ffdf9..54ad2c6e5f9d 100644
--- a/python/dgl/runtime/__init__.py
+++ b/python/dgl/runtime/__init__.py
@@ -1,4 +1,4 @@
-"""DGL Runtime"""
+"""Package for DGL scheduler and runtime."""
 from __future__ import absolute_import
 
 from . import scheduler
diff --git a/python/dgl/runtime/degree_bucketing.py b/python/dgl/runtime/degree_bucketing.py
index cd002e0b8556..f7f39bc5483d 100644
--- a/python/dgl/runtime/degree_bucketing.py
+++ b/python/dgl/runtime/degree_bucketing.py
@@ -1,15 +1,14 @@
-"""Module for degree bucketing schedulers"""
+"""Module for degree bucketing schedulers."""
 from __future__ import absolute_import
 
 from .._ffi.function import _init_api
-from ..base import is_all, ALL
+from ..base import is_all
 from .. import backend as F
-from ..immutable_graph_index import ImmutableGraphIndex
-from ..udf import EdgeBatch, NodeBatch
+from ..udf import NodeBatch
 from .. import utils
 
 from . import ir
-from .ir import var as var
+from .ir import var
 
 def gen_degree_bucketing_schedule(
         graph,
@@ -52,23 +51,23 @@ def gen_degree_bucketing_schedule(
     """
     buckets = _degree_bucketing_schedule(message_ids, dst_nodes, recv_nodes)
     # generate schedule
-    unique_dst, degs, buckets, msg_ids, zero_deg_nodes = buckets
+    _, degs, buckets, msg_ids, zero_deg_nodes = buckets
     # loop over each bucket
     idx_list = []
     fd_list = []
-    for deg, vb, mid in zip(degs, buckets, msg_ids):
+    for deg, vbkt, mid in zip(degs, buckets, msg_ids):
         # create per-bkt rfunc
-        rfunc = _create_per_bkt_rfunc(graph, reduce_udf, deg, vb)
+        rfunc = _create_per_bkt_rfunc(graph, reduce_udf, deg, vbkt)
         # vars
-        vb = var.IDX(vb)
+        vbkt = var.IDX(vbkt)
         mid = var.IDX(mid)
         rfunc = var.FUNC(rfunc)
         # recv on each bucket
-        fdvb = ir.READ_ROW(var_nf, vb)
+        fdvb = ir.READ_ROW(var_nf, vbkt)
         fdmail = ir.READ_ROW(var_mf, mid)
         fdvb = ir.NODE_UDF(rfunc, fdvb, fdmail, ret=fdvb)  # reuse var
         # save for merge
-        idx_list.append(vb)
+        idx_list.append(vbkt)
         fd_list.append(fdvb)
     if zero_deg_nodes is not None:
         # NOTE: there must be at least one non-zero-deg node; otherwise,
@@ -178,15 +177,16 @@ def _process_buckets(buckets):
 
     return v, degs, dsts, msg_ids, zero_deg_nodes
 
-def _create_per_bkt_rfunc(graph, reduce_udf, deg, vb):
+def _create_per_bkt_rfunc(graph, reduce_udf, deg, vbkt):
+    """Internal function to generate the per degree bucket node UDF."""
     def _rfunc_wrapper(node_data, mail_data):
         def _reshaped_getter(key):
             msg = mail_data[key]
-            new_shape = (len(vb), deg) + F.shape(msg)[1:]
+            new_shape = (len(vbkt), deg) + F.shape(msg)[1:]
             return F.reshape(msg, new_shape)
         reshaped_mail_data = utils.LazyDict(_reshaped_getter, mail_data.keys())
-        nb = NodeBatch(graph, vb, node_data, reshaped_mail_data)
-        return reduce_udf(nb)
+        nbatch = NodeBatch(graph, vbkt, node_data, reshaped_mail_data)
+        return reduce_udf(nbatch)
     return _rfunc_wrapper
 
 _init_api("dgl.runtime.degree_bucketing")
diff --git a/python/dgl/runtime/ir/__init__.py b/python/dgl/runtime/ir/__init__.py
index 72485dd2b352..d84dcfb690b3 100644
--- a/python/dgl/runtime/ir/__init__.py
+++ b/python/dgl/runtime/ir/__init__.py
@@ -1,2 +1,3 @@
+"""Package for DGL's internal IR."""
 from .executor import *
 from .program import get_current_prog, prog
diff --git a/python/dgl/runtime/ir/executor.py b/python/dgl/runtime/ir/executor.py
index 4220d8e8d740..95587c0193c6 100644
--- a/python/dgl/runtime/ir/executor.py
+++ b/python/dgl/runtime/ir/executor.py
@@ -1,10 +1,11 @@
+"""Module for executors."""
+# pylint: disable=invalid-name
 from __future__ import absolute_import
 
 from abc import abstractmethod
 import functools
 import operator
 
-from ...base import DGLError
 from ... import backend as F
 from ...frame import FrameRef, Frame
 from ... import utils
@@ -14,7 +15,29 @@
 from .var import VarType
 from .registry import IR_REGISTRY
 
+__all__ = [
+    'OpCode', 'Executor',
+    'NodeUDFExecutor', 'NODE_UDF',
+    'EdgeUDFExecutor', 'EDGE_UDF',
+    'SPMVExecutor', 'SPMV',
+    'SPMVWithDataExecutor', 'SPMV_WITH_DATA',
+    'ReadExecutor', 'READ',
+    'ReadColExecutor', 'READ_COL',
+    'ReadRowExecutor', 'READ_ROW',
+    'MergeRowExecutor', 'MERGE_ROW',
+    'UpdateDictExecutor', 'UPDATE_DICT',
+    'NewDictExecutor', 'NEW_DICT',
+    'Write_Executor', 'WRITE_',
+    'WriteCol_Executor', 'WRITE_COL_',
+    'WriteRow_Executor', 'WRITE_ROW_',
+    'WriteDict_Executor', 'WRITE_DICT_',
+    'AppendRow_Executor', 'APPEND_ROW_',
+    'WriteRowInplace_Executor', 'WRITE_ROW_INPLACE_',
+    'ClearFrame_Executor', 'CLEAR_FRAME_',
+]
+
 class OpCode(object):
+    """Opcode for all the executor types."""
     # immutable op
     NODE_UDF = 0
     EDGE_UDF = 1
@@ -37,23 +60,49 @@ class OpCode(object):
     CLEAR_FRAME_ = 27
 
 class Executor(object):
+    """Base executor class.
+
+    An executor is similar to a basic operator in dataflow-based framework.
+    The executor can be evaluated by the ``run`` function.
+    """
     @abstractmethod
     def opcode(self):
+        """Return the opcode of this executor."""
         raise NotImplementedError
 
     @abstractmethod
     def arg_vars(self):
+        """Return the argument variable list of this executor."""
         raise NotImplementedError
 
     @abstractmethod
     def ret_var(self):
+        """Return the result variable of this executor."""
         raise NotImplementedError
 
     @abstractmethod
     def run(self):
+        """Evaluate this executor.
+
+        The function takes no argument and returns none, which means all the
+        argument and result variables must be pre-bound.
+        """
         raise NotImplementedError
 
 class NodeUDFExecutor(Executor):
+    """Executor for Node UDF call.
+
+    Parameters
+    ----------
+    fn : var.Var
+        The UDF.
+    fdnode : var.Var
+        The node feature dict.
+    fdmail : var.Var
+        The mailbox data dict.
+    ret : var.Var
+        The return new node feature dict.
+    """
     def __init__(self, fn, fdnode, fdmail, ret):
         self.fn = fn
         self.fdnode = fdnode
@@ -88,13 +137,48 @@ def run(self):
     'ret_type' : VarType.FEAT_DICT,
     'executor_cls' : NodeUDFExecutor,
 }
+
 def NODE_UDF(fn, fdnode, fdmail=None, ret=None):
+    """Apply the node UDF and get the new node feature symbolically.
+
+    Parameters
+    ----------
+    fn : var.Var
+        The UDF.
+    fdnode : var.Var
+        The node feature dict.
+    fdmail : var.Var
+        The mailbox data dict.
+    ret : var.Var, optional
+        The return variable for new node feature dict. If not give,
+        a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.NODE_UDF]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fn, fdnode, fdmail, ret))
     return ret
 
 class EdgeUDFExecutor(Executor):
+    """Executor for edge UDF call.
+
+    Parameters
+    ----------
+    fn : var.Var
+        The UDF.
+    fdsrc : var.Var
+        The src node feature dict.
+    fdedge : var.Var
+        The edge feature dict.
+    fddst : var.Var
+        The dst node feature dict.
+    ret : var.Var
+        The return new edge feature dict.
+    """
     def __init__(self, fn, fdsrc, fdedge, fddst, ret):
         self.fn = fn
         self.fdsrc = fdsrc
@@ -126,12 +210,46 @@ def run(self):
     'executor_cls' : EdgeUDFExecutor,
 }
 def EDGE_UDF(fn, fdsrc, fdedge, fddst, ret=None):
+    """Apply the edge UDF and get the new edge feature symbolically.
+
+    Parameters
+    ----------
+    fn : var.Var
+        The UDF.
+    fdsrc : var.Var
+        The src node feature dict.
+    fdedge : var.Var
+        The edge feature dict.
+    fddst : var.Var
+        The dst node feature dict.
+    ret : var.Var, optional
+        The return variable for new node feature dict. If not give,
+        a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.EDGE_UDF]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fn, fdsrc, fdedge, fddst, ret))
     return ret
 
 class ReadExecutor(Executor):
+    """Executor for read data from feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    col : var.Var
+        The column name.
+    ret : var.Var
+        The return feature tensor.
+    """
     def __init__(self, fd, row, col, ret):
         self.fd = fd
         self.row = row
@@ -159,13 +277,43 @@ def run(self):
     'ret_type' : VarType.FEAT,
     'executor_cls' : ReadExecutor,
 }
+
 def READ(fd, row, col, ret=None):
+    """Read the feature data from the dictionary specified by the row and column symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    col : var.Var
+        The column name.
+    ret : var.Var, optional
+        The return feature tensor. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.READ]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fd, row, col, ret))
     return ret
 
 class ReadColExecutor(Executor):
+    """Executor for read column data from feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    col : var.Var
+        The column name.
+    ret : var.Var
+        The return feature tensor.
+    """
     def __init__(self, fd, col, ret):
         self.fd = fd
         self.col = col
@@ -191,13 +339,41 @@ def run(self):
     'ret_type' : VarType.FEAT,
     'executor_cls' : ReadColExecutor,
 }
+
 def READ_COL(fd, col, ret=None):
+    """Read the column data from the dictionary.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    col : var.Var
+        The column name.
+    ret : var.Var, optional
+        The return feature tensor. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.READ_COL]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fd, col, ret))
     return ret
 
 class ReadRowExecutor(Executor):
+    """Executor for read row data from feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    ret : var.Var
+        The return feature tensor.
+    """
     def __init__(self, fd, row, ret):
         self.fd = fd
         self.row = row
@@ -223,13 +399,42 @@ def run(self):
     'ret_type' : VarType.FEAT_DICT,
     'executor_cls' : ReadRowExecutor,
 }
+
 def READ_ROW(fd, row, ret=None):
+    """Read the row data from the dictionary.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    ret : var.Var, optional
+        The return feature tensor. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.READ_ROW]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fd, row, ret))
     return ret
 
 class SPMVExecutor(Executor):
+    """Executor for sparse-matrix-dense-matrix multiply.
+
+    Parameters
+    ----------
+    spA : var.Var
+        Variable for sparse matrix lambda. The lambda returns the sparse matrix
+        given a context object.
+    B : var.Var
+        Variable for the dense feature tensor.
+    ret : var.Var
+        Variable for the result.
+    """
     def __init__(self, spA, B, ret):
         self.spA = spA
         self.B = B
@@ -258,8 +463,7 @@ def run(self):
             # Flatten the dim 1:~
             B_shape = F.shape(B)
             feat_shape = B_shape[1:]
-            tmp_B_shape = (B_shape[0],
-                    functools.reduce(operator.mul, feat_shape, 1))
+            tmp_B_shape = (B_shape[0], functools.reduce(operator.mul, feat_shape, 1))
             B = F.reshape(B, tmp_B_shape)
             C = F.spmm(spA, B)
             C_shape = (F.shape(C)[0],) + feat_shape
@@ -274,13 +478,45 @@ def run(self):
     'ret_type' : VarType.FEAT,
     'executor_cls' : SPMVExecutor,
 }
+
 def SPMV(spA, B, ret=None):
+    """Perform sparse-matrix-dense-matrix multiply symbolically.
+
+    Parameters
+    ----------
+    spA : var.Var
+        Variable for sparse matrix lambda. The lambda returns the sparse matrix
+        given a context object.
+    B : var.Var
+        Variable for the dense feature tensor.
+    ret : var.Var, optional
+        Variable for the result. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.SPMV]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](spA, B, ret))
     return ret
 
 class SPMVWithDataExecutor(Executor):
+    """Executor for sparse-matrix-dense-matrix multiply with provided sparse data.
+
+    Parameters
+    ----------
+    spA : var.Var
+        Variable for sparse matrix lambda. The lambda returns the sparse matrix
+        given a context object.
+    A_data : var.Var
+        Variable for the sparse matrix data.
+    B : var.Var
+        Variable for the dense feature tensor.
+    ret : var.Var
+        Variable for the result.
+    """
     def __init__(self, spA, A_data, B, ret):
         self.spA = spA
         self.A_data = A_data
@@ -320,8 +556,7 @@ def run(self):
             # Flatten the dim 1:~
             B_shape = F.shape(B)
             feat_shape = B_shape[1:]
-            tmp_B_shape = (B_shape[0],
-                    functools.reduce(operator.mul, feat_shape, 1))
+            tmp_B_shape = (B_shape[0], functools.reduce(operator.mul, feat_shape, 1))
             B = F.reshape(B, tmp_B_shape)
             C = F.spmm(spA, B)
             C_shape = (F.shape(C)[0],) + feat_shape
@@ -336,13 +571,44 @@ def run(self):
     'ret_type' : VarType.FEAT,
     'executor_cls' : SPMVWithDataExecutor,
 }
+
 def SPMV_WITH_DATA(spA, A_data, B, ret=None):
+    """Perform sparse-matrix-dense-matrix multiply with sparse data symbolically.
+
+    Parameters
+    ----------
+    spA : var.Var
+        Variable for sparse matrix lambda. The lambda returns the sparse matrix
+        given a context object.
+    A_data : var.Var
+        Variable for the sparse matrix data.
+    B : var.Var
+        Variable for the dense feature tensor.
+    ret : var.Var, optional
+        Variable for the result. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.SPMV_WITH_DATA]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](spA, A_data, B, ret))
     return ret
 
 class MergeRowExecutor(Executor):
+    """Executor for merge row data according to the given order.
+
+    Parameters
+    ----------
+    order : var.Var
+        The order index.
+    fd_list : list of var.Var
+        The list of row data variables. Each represents a feature dict.
+    ret : var.Var
+        Variable for the result.
+    """
     def __init__(self, order, fd_list, ret):
         self.order = order
         self.fd_list = fd_list
@@ -373,13 +639,43 @@ def run(self):
     'ret_type' : VarType.FEAT_DICT,
     'executor_cls' : MergeRowExecutor,
 }
+
 def MERGE_ROW(idx_list, fd_list, ret=None):
+    """Merge row data according to the given order symbolically.
+
+    Parameters
+    ----------
+    order : var.Var
+        The order index.
+    fd_list : list of var.Var
+        The list of row data variables. Each represents a feature dict.
+    ret : var.Var, optional
+        Variable for the result. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.MERGE_ROW]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](idx_list, fd_list, ret))
     return ret
 
 class UpdateDictExecutor(Executor):
+    """Executor for update feature dictionary with another one.
+
+    Similar to python dict's update but return a new dictionary.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        Variable for the feature dict to be updated.
+    fd2 : var.Var
+        Variable for the provided feature dict.
+    ret : var.Var
+        Variable for the result.
+    """
     def __init__(self, fd1, fd2, ret):
         self.fd1 = fd1
         self.fd2 = fd2
@@ -398,7 +694,7 @@ def run(self):
         fd1_data = self.fd1.data
         fd2_data = self.fd2.data
         if (isinstance(fd1_data, utils.LazyDict)
-            or isinstance(fd2_data, utils.LazyDict)):
+                or isinstance(fd2_data, utils.LazyDict)):
             # NOTE: fd2 has higher priority
             ret_data = utils.HybridDict(fd2_data, fd1_data)
         else:
@@ -412,13 +708,45 @@ def run(self):
     'ret_type' : VarType.FEAT_DICT,
     'executor_cls' : UpdateDictExecutor,
 }
+
 def UPDATE_DICT(fd1, fd2, ret=None):
+    """Executor for update feature dictionary with another one.
+
+    Similar to python dict's update but return a new dictionary.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        Variable for the feature dict to be updated.
+    fd2 : var.Var
+        Variable for the provided feature dict.
+    ret : var.Var, optional
+        Variable for the result. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.UPDATE_DICT]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fd1, fd2, ret))
     return ret
 
 class NewDictExecutor(Executor):
+    """Executor for creating new feature dictionary.
+
+    Parameters
+    ----------
+    fd_init : var.Var
+        The feat dict to borrow initializer.
+    idx : var.Var
+        The index to look for number or rows.
+    fd_scheme : var.Var
+        The feat dict to look for column scheme.
+    ret : var.Var
+        Variable for the result.
+    """
     def __init__(self, fd_init, idx, fd_scheme, ret):
         self.fd_init = fd_init  # the feat dict to borrow initializer
         self.idx = idx  # the index to look for number or rows
@@ -455,13 +783,45 @@ def run(self):
     'ret_type' : VarType.FEAT_DICT,
     'executor_cls' : NewDictExecutor,
 }
+
 def NEW_DICT(fd_init, idx, fd_scheme, ret=None):
+    """Create a new dictionary symbolically.
+
+    Parameters
+    ----------
+    fd_init : var.Var
+        The feat dict to borrow initializer.
+    idx : var.Var
+        The index to look for number or rows.
+    fd_scheme : var.Var
+        The feat dict to look for column scheme.
+    ret : var.Var
+        Variable for the result. If not give, a new variable will be created.
+
+    Returns
+    -------
+    var.Var
+        Variable for the result.
+    """
     reg = IR_REGISTRY[OpCode.NEW_DICT]
     ret = var.new(reg['ret_type']) if ret is None else ret
     get_current_prog().issue(reg['executor_cls'](fd_init, idx, fd_scheme, ret))
     return ret
 
 class Write_Executor(Executor):
+    """Executor for writing the given data to the feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    col : var.Var
+        The column name.
+    val : var.Var
+        The given feature data.
+    """
     def __init__(self, fd, row, col, val):
         self.fd = fd
         self.row = row
@@ -490,11 +850,36 @@ def run(self):
     'ret_type' : None,
     'executor_cls' : Write_Executor,
 }
+
 def WRITE_(fd, row, col, val):
+    """Write the given data to the feature dict symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    col : var.Var
+        The column name.
+    val : var.Var
+        The given feature data.
+    """
     reg = IR_REGISTRY[OpCode.WRITE_]
     get_current_prog().issue(reg['executor_cls'](fd, row, col, val))
 
 class WriteCol_Executor(Executor):
+    """Executor for writing the given column data to the feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    col : var.Var
+        The column name.
+    val : var.Var
+        The given feature data.
+    """
     def __init__(self, fd, col, val):
         self.fd = fd
         self.col = col
@@ -521,11 +906,34 @@ def run(self):
     'ret_type' : None,
     'executor_cls' : WriteCol_Executor,
 }
+
 def WRITE_COL_(fd, col, val):
+    """Writing the given column data to the feature dict symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    col : var.Var
+        The column name.
+    val : var.Var
+        The given feature data.
+    """
     reg = IR_REGISTRY[OpCode.WRITE_COL_]
     get_current_prog().issue(reg['executor_cls'](fd, col, val))
 
 class WriteRow_Executor(Executor):
+    """Executor for writing the given row data to the feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    val : var.Var
+        The given feature data.
+    """
     def __init__(self, fd, row, val):
         self.fd = fd
         self.row = row
@@ -552,11 +960,34 @@ def run(self):
     'ret_type' : None,
     'executor_cls' : WriteRow_Executor,
 }
+
 def WRITE_ROW_(fd, row, val):
+    """Write the given row data to the feature dict symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    val : var.Var
+        The given feature data.
+    """
     reg = IR_REGISTRY[OpCode.WRITE_ROW_]
     get_current_prog().issue(reg['executor_cls'](fd, row, val))
 
 class WriteRowInplace_Executor(Executor):
+    """Executor for writing the given row data to the feature dict in-place.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    val : var.Var
+        The given feature data.
+    """
     def __init__(self, fd, row, val):
         self.fd = fd
         self.row = row
@@ -575,7 +1006,7 @@ def run(self):
         fd_data = self.fd.data  # feature dict
         row_data = self.row.data  # idx
         val_data = self.val.data
-        fd_data.set_item_inplace(row_data, val_data, inplace=True)
+        fd_data.update_data(row_data, val_data, inplace=True)
 
 IR_REGISTRY[OpCode.WRITE_ROW_INPLACE_] = {
     'name' : 'WRITE_ROW_INPLACE_',
@@ -585,10 +1016,30 @@ def run(self):
 }
 
 def WRITE_ROW_INPLACE_(fd, row, val):
+    """Write the given row data to the feature dict in-place symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict.
+    row : var.Var
+        The row index.
+    val : var.Var
+        The given feature data.
+    """
     reg = IR_REGISTRY[OpCode.WRITE_ROW_INPLACE_]
     get_current_prog().issue(reg['executor_cls'](fd, row, val))
 
 class WriteDict_Executor(Executor):
+    """Executor for writing the given feature dict data into the another one.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        The feature dict to be mutated.
+    fd2 : var.Var
+        The feature dict data.
+    """
     def __init__(self, fd1, fd2):
         self.fd1 = fd1
         self.fd2 = fd2
@@ -614,11 +1065,30 @@ def run(self):
     'ret_type' : None,
     'executor_cls' : WriteDict_Executor,
 }
+
 def WRITE_DICT_(fd1, fd2):
+    """Writing the given feature dict data into the another one symbolically.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        The feature dict to be mutated.
+    fd2 : var.Var
+        The feature dict data.
+    """
     reg = IR_REGISTRY[OpCode.WRITE_DICT_]
     get_current_prog().issue(reg['executor_cls'](fd1, fd2))
 
 class AppendRow_Executor(Executor):
+    """Executor for appending one feature dict to another.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        The feature dict in the front.
+    fd2 : var.Var
+        The feature dict in the back.
+    """
     def __init__(self, fd1, fd2):
         self.fd1 = fd1
         self.fd2 = fd2
@@ -644,10 +1114,26 @@ def run(self):
     'executor_cls' : AppendRow_Executor,
 }
 def APPEND_ROW_(fd1, fd2):
+    """Append one feature dict to another symbolically.
+
+    Parameters
+    ----------
+    fd1 : var.Var
+        The feature dict in the front.
+    fd2 : var.Var
+        The feature dict in the back.
+    """
     reg = IR_REGISTRY[OpCode.APPEND_ROW_]
     get_current_prog().issue(reg['executor_cls'](fd1, fd2))
 
 class ClearFrame_Executor(Executor):
+    """Executor for clear the feature dict.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict to be cleared.
+    """
     def __init__(self, fd):
         self.fd = fd
 
@@ -672,6 +1158,14 @@ def run(self):
     'ret_type': None,
     'executor_cls': ClearFrame_Executor,
 }
+
 def CLEAR_FRAME_(fd):
+    """Clear the feature dict symbolically.
+
+    Parameters
+    ----------
+    fd : var.Var
+        The feature dict to be cleared.
+    """
     reg = IR_REGISTRY[OpCode.CLEAR_FRAME_]
     get_current_prog().issue(reg['executor_cls'](fd))
diff --git a/python/dgl/runtime/ir/program.py b/python/dgl/runtime/ir/program.py
index d2824a1256f5..fa17937fb050 100644
--- a/python/dgl/runtime/ir/program.py
+++ b/python/dgl/runtime/ir/program.py
@@ -1,3 +1,4 @@
+"""Module for program."""
 from __future__ import absolute_import
 
 from contextlib import contextmanager
@@ -5,15 +6,26 @@
 from .registry import IR_REGISTRY
 
 class Prog(object):
-    """The program."""
+    """The program.
+
+    A program is simply a list of executors.
+    """
     def __init__(self):
         self.execs = []
         self.varcount = 0
 
     def issue(self, exe):
+        """Issue an executor to this program.
+
+        Parameters
+        ----------
+        exe : Executor
+            The executor.
+        """
         self.execs.append(exe)
 
     def pprint_exe(self, exe):
+        """Internal function to pretty-print the executor."""
         argstr = ', '.join([str(av) for av in exe.arg_vars()])
         if exe.ret_var() is None:
             # stmt
@@ -28,21 +40,26 @@ def pprint_exe(self, exe):
                 argstr))
 
     def pprint(self):
+        """Pretty-print the program."""
         for exe in self.execs:
             self.pprint_exe(exe)
 
-_current_prog = None
+# current program
+CURRENT_PROG = None
 
 def get_current_prog():
-    global _current_prog
-    return _current_prog
+    """Get the current program."""
+    global CURRENT_PROG
+    return CURRENT_PROG
 
-def set_current_prog(prog):
-    global _current_prog
-    _current_prog = prog
+def set_current_prog(program):
+    """Set the current program."""
+    global CURRENT_PROG
+    CURRENT_PROG = program
 
 @contextmanager
 def prog():
+    """A context manager to create a new program."""
     set_current_prog(Prog())
     yield get_current_prog()
     set_current_prog(None)
diff --git a/python/dgl/runtime/ir/var.py b/python/dgl/runtime/ir/var.py
index e28eea1fc10e..4f39f8c8cdcc 100644
--- a/python/dgl/runtime/ir/var.py
+++ b/python/dgl/runtime/ir/var.py
@@ -1,8 +1,11 @@
+"""Module for variables."""
+# pylint: disable=invalid-name
 from __future__ import absolute_import
 
 from .program import get_current_prog
 
 class VarType(object):
+    """Variable types."""
     # Types for symbolic objects (i.e, they might not be
     #  concretized before evaluation.
     FEAT = 0
@@ -23,47 +26,65 @@ class VarType(object):
 ]
 
 class Var(object):
-    """Variable
+    """Class for variables in IR.
+
+    Variables represent data in the IR. A variable can contain concrete values.
+    Otherwise, it can act as a "symbol", whose values are not materialized at the
+    moment, but later.
+
+    Parameters
+    ----------
     name : str
+        The variable name.
     type : int
+        The type code.
     data : any, default=None (not concretized)
+        The data.
     """
-    __slots__ = ['name', 'type', 'data']
-    def __init__(self, name, type, data):
+    __slots__ = ['name', 'typecode', 'data']
+    def __init__(self, name, typecode, data):
         self.name = name
-        self.type = type
+        self.typecode = typecode
         self.data = data
 
     def __str__(self):
-        if self.type == VarType.STR:
+        if self.typecode == VarType.STR:
             return '"%s"' % self.data
         else:
             return self.name
 
     def typestr(self):
-        return VAR_TYPE_NAME_MAP[self.type]
+        """Return the type string of this variable."""
+        return VAR_TYPE_NAME_MAP[self.typecode]
 
-def new(type, data=None, name=None):
+def new(typecode, data=None, name=None):
+    """Create a new variable."""
     if name is None:
         cur_prog = get_current_prog()
         name = '_z%d' % cur_prog.varcount
         cur_prog.varcount += 1
-    return Var(name, type, data)
+    return Var(name, typecode, data)
 
 def FEAT(data=None, name=None):
+    """Create a variable for feature tensor."""
     return new(VarType.FEAT, data, name)
 
 def FEAT_DICT(data=None, name=None):
+    """Create a variable for feature dict."""
     return new(VarType.FEAT_DICT, data, name)
 
 def SPMAT(data=None, name=None):
+    """Create a variable for sparse matrix lambda."""
     return new(VarType.SPMAT, data, name)
 
 def IDX(data=None, name=None):
+    """Create a variable for index."""
     return new(VarType.IDX, data, name)
 
 def STR(data=None, name=None):
+    """Create a variable for string value."""
     return new(VarType.STR, data, name)
 
 def FUNC(data=None, name=None):
+    """Create a variable for function."""
     return new(VarType.FUNC, data, name)
diff --git a/python/dgl/runtime/runtime.py b/python/dgl/runtime/runtime.py
index e0e26fbf20b2..a4658deb2d41 100644
--- a/python/dgl/runtime/runtime.py
+++ b/python/dgl/runtime/runtime.py
@@ -1,8 +1,10 @@
 """DGL mini-runtime."""
 
 class Runtime(object):
+    """The mini runtime class."""
     @staticmethod
     def run(prog):
+        """Run the given program."""
         for exe in prog.execs:
             #prog.pprint_exe(exe)
             exe.run()
diff --git a/python/dgl/runtime/scheduler.py b/python/dgl/runtime/scheduler.py
index b7f6a00ad374..e16d2de993d2 100644
--- a/python/dgl/runtime/scheduler.py
+++ b/python/dgl/runtime/scheduler.py
@@ -3,27 +3,27 @@
 
 from .. import utils
 from .._ffi.function import _init_api
-from ..base import ALL, DGLError, is_all
+from ..base import DGLError
 from .. import backend as F
 from ..frame import frame_like, FrameRef
 from ..function.base import BuiltinFunction, BundledFunction
 from ..udf import EdgeBatch, NodeBatch
 
 from . import ir
-from .ir import var as var
+from .ir import var
 from . import degree_bucketing as db
 from . import spmv
 
 __all__ = [
-            "schedule_send",
-            "schedule_recv",
-            "schedule_update_all",
-            "schedule_snr",
-            "schedule_apply_nodes",
-            "schedule_apply_edges",
-            "schedule_push",
-            "schedule_pull"
-          ]
+    "schedule_send",
+    "schedule_recv",
+    "schedule_update_all",
+    "schedule_snr",
+    "schedule_apply_nodes",
+    "schedule_apply_edges",
+    "schedule_push",
+    "schedule_pull"
+]
 
 def schedule_send(graph, u, v, eid, message_func):
     """get send schedule
@@ -132,7 +132,6 @@ def schedule_snr(graph,
     inplace: bool
         If True, the update will be done in place
     """
-    call_type = 'send_and_recv'
     u, v, eid = edge_tuples
     recv_nodes, _ = F.sort_1d(F.unique(v.tousertensor()))
     recv_nodes = utils.toindex(recv_nodes)
@@ -143,13 +142,12 @@ def schedule_snr(graph,
     var_eid = var.IDX(eid)
     var_recv_nodes = var.IDX(recv_nodes, name='recv_nodes')
     # generate send and reduce schedule
-    uv_getter = lambda : (var_u, var_v)
-    adj_creator = lambda : spmv.build_adj_matrix_uv(graph, (u, v), recv_nodes)
-    inc_creator = lambda : spmv.build_inc_matrix_dst(v, recv_nodes)
-    reduced_feat = _gen_send_reduce(
-            graph, message_func, reduce_func,
-            var_eid, var_recv_nodes,
-            uv_getter, adj_creator, inc_creator)
+    uv_getter = lambda: (var_u, var_v)
+    adj_creator = lambda: spmv.build_adj_matrix_uv(graph, (u, v), recv_nodes)
+    inc_creator = lambda: spmv.build_inc_matrix_dst(v, recv_nodes)
+    reduced_feat = _gen_send_reduce(graph, message_func, reduce_func,
+                                    var_eid, var_recv_nodes,
+                                    uv_getter, adj_creator, inc_creator)
     # generate apply schedule
     final_feat = _apply_with_accum(graph, var_recv_nodes, var_nf, reduced_feat, apply_func)
     if inplace:
@@ -180,7 +178,6 @@ def schedule_update_all(graph,
             nodes = utils.toindex(slice(0, graph.number_of_nodes()))
             schedule_apply_nodes(graph, nodes, apply_func, inplace=False)
     else:
-        call_type = 'update_all'
         eid = utils.toindex(slice(0, graph.number_of_edges()))  # shortcut for ALL
         recv_nodes = utils.toindex(slice(0, graph.number_of_nodes()))  # shortcut for ALL
         # create vars
@@ -191,12 +188,11 @@ def schedule_update_all(graph,
         def uv_getter():
             src, dst, _ = graph._graph.edges()
             return var.IDX(src), var.IDX(dst)
-        adj_creator = lambda : spmv.build_adj_matrix_graph(graph)
-        inc_creator = lambda : spmv.build_inc_matrix_graph(graph)
-        reduced_feat = _gen_send_reduce(
-                graph, message_func, reduce_func,
-                var_eid, var_recv_nodes,
-                uv_getter, adj_creator, inc_creator)
+        adj_creator = lambda: spmv.build_adj_matrix_graph(graph)
+        inc_creator = lambda: spmv.build_inc_matrix_graph(graph)
+        reduced_feat = _gen_send_reduce(graph, message_func, reduce_func,
+                                        var_eid, var_recv_nodes,
+                                        uv_getter, adj_creator, inc_creator)
         # generate optional apply
         final_feat = _apply_with_accum(graph, var_recv_nodes, var_nf, reduced_feat, apply_func)
         ir.WRITE_DICT_(var_nf, final_feat)
@@ -226,8 +222,8 @@ def schedule_apply_nodes(graph,
     var_v = var.IDX(v)
     v_nf = ir.READ_ROW(var_nf, var_v)
     def _afunc_wrapper(node_data):
-        nb = NodeBatch(graph, v, node_data)
-        return apply_func(nb)
+        nbatch = NodeBatch(graph, v, node_data)
+        return apply_func(nbatch)
     afunc = var.FUNC(_afunc_wrapper)
     applied_feat = ir.NODE_UDF(afunc, v_nf)
     if inplace:
@@ -271,9 +267,8 @@ def schedule_apply_edges(graph,
     fddst = ir.READ_ROW(var_nf, var_v)
     fdedge = ir.READ_ROW(var_ef, var_eid)
     def _efunc_wrapper(src_data, edge_data, dst_data):
-        eb = EdgeBatch(graph, (u, v, eid),
-                src_data, edge_data, dst_data)
-        return apply_func(eb)
+        ebatch = EdgeBatch(graph, (u, v, eid), src_data, edge_data, dst_data)
+        return apply_func(ebatch)
     _efunc = var.FUNC(_efunc_wrapper)
     new_fdedge = ir.EDGE_UDF(_efunc, fdsrc, fdedge, fddst)
     if inplace:
@@ -343,7 +338,6 @@ def schedule_pull(graph,
         if apply_func is not None:
             schedule_apply_nodes(graph, pull_nodes, apply_func, inplace)
     else:
-        call_type = 'send_and_recv'
         pull_nodes, _ = F.sort_1d(F.unique(pull_nodes.tousertensor()))
         pull_nodes = utils.toindex(pull_nodes)
         # create vars
@@ -353,13 +347,12 @@ def schedule_pull(graph,
         var_v = var.IDX(v)
         var_eid = var.IDX(eid)
         # generate send and reduce schedule
-        uv_getter = lambda : (var_u, var_v)
-        adj_creator = lambda : spmv.build_adj_matrix_uv(graph, (u, v), pull_nodes)
-        inc_creator = lambda : spmv.build_inc_matrix_dst(v, pull_nodes)
-        reduced_feat = _gen_send_reduce(
-                graph, message_func, reduce_func,
-                var_eid, var_pull_nodes,
-                uv_getter, adj_creator, inc_creator)
+        uv_getter = lambda: (var_u, var_v)
+        adj_creator = lambda: spmv.build_adj_matrix_uv(graph, (u, v), pull_nodes)
+        inc_creator = lambda: spmv.build_inc_matrix_dst(v, pull_nodes)
+        reduced_feat = _gen_send_reduce(graph, message_func, reduce_func,
+                                        var_eid, var_pull_nodes,
+                                        uv_getter, adj_creator, inc_creator)
         # generate optional apply
         final_feat = _apply_with_accum(graph, var_pull_nodes, var_nf, reduced_feat, apply_func)
         if inplace:
@@ -423,8 +416,8 @@ def _apply_with_accum(graph, var_nodes, var_nf, var_accum, apply_func):
         v_nf = ir.READ_ROW(var_nf, var_nodes)
         v_nf = ir.UPDATE_DICT(v_nf, var_accum)
         def _afunc_wrapper(node_data):
-            nb = NodeBatch(graph, var_nodes.data, node_data)
-            return apply_func(nb)
+            nbatch = NodeBatch(graph, var_nodes.data, node_data)
+            return apply_func(nbatch)
         afunc = var.FUNC(_afunc_wrapper)
         applied_feat = ir.NODE_UDF(afunc, v_nf)
         final_feat = ir.UPDATE_DICT(var_accum, applied_feat)
@@ -439,7 +432,6 @@ def _gen_reduce(graph, reduce_func, edge_tuples, recv_nodes):
     edge_tuples : tuple of utils.Index
     recv_nodes : utils.Index
     """
-    call_type = "recv"
     _, dst, eid = edge_tuples
     rfunc = _standardize_func_usage(reduce_func, 'reduce')
     rfunc_is_list = utils.is_iterable(rfunc)
@@ -451,9 +443,9 @@ def _gen_reduce(graph, reduce_func, edge_tuples, recv_nodes):
     tmpframe = FrameRef(frame_like(graph._node_frame._frame, len(recv_nodes)))
 
     # vars
-    msg = var.FEAT_DICT(graph._msg_frame, 'msg')
-    nf = var.FEAT_DICT(graph._node_frame, 'nf')
-    out = var.FEAT_DICT(data=tmpframe)
+    var_msg = var.FEAT_DICT(graph._msg_frame, 'msg')
+    var_nf = var.FEAT_DICT(graph._node_frame, 'nf')
+    var_out = var.FEAT_DICT(data=tmpframe)
 
     if rfunc_is_list:
         # UDF message + builtin reducer
@@ -461,19 +453,19 @@ def _gen_reduce(graph, reduce_func, edge_tuples, recv_nodes):
         spmv_rfunc, rfunc = spmv.analyze_e2v_spmv(graph, rfunc)
         inc = spmv.build_inc_matrix_eid(graph._msg_frame.num_rows, eid, dst,
                                         recv_nodes)
-        spmv.gen_e2v_spmv_schedule(inc, spmv_rfunc, msg, out)
+        spmv.gen_e2v_spmv_schedule(inc, spmv_rfunc, var_msg, var_out)
 
         if len(rfunc) == 0:
             # All mfunc and rfunc has been processed.
-            return out
+            return var_out
 
         # convert the remaining rfunc to UDFs
         rfunc = BundledFunction(rfunc)
 
     # gen degree bucketing schedule for UDF recv
     db.gen_degree_bucketing_schedule(graph, rfunc, eid, dst,
-            recv_nodes, nf, msg, out)
-    return out
+                                     recv_nodes, var_nf, var_msg, var_out)
+    return var_out
 
 def _gen_send_reduce(
         graph,
@@ -573,19 +565,19 @@ def _gen_send_reduce(
 
     # gen degree bucketing schedule for UDF recv
     mid = utils.toindex(slice(0, len(var_v.data)))  # message id is from 0~|dst|
-    db.gen_degree_bucketing_schedule(graph, rfunc,
-            mid, var_v.data, reduce_nodes,
-            var_nf, var_mf, var_out)
+    db.gen_degree_bucketing_schedule(
+        graph, rfunc, mid, var_v.data, reduce_nodes, var_nf, var_mf, var_out)
     return var_out
 
-def _gen_send(graph, nf, ef, u, v, eid, mfunc):
-    fdsrc = ir.READ_ROW(nf, u)
-    fddst = ir.READ_ROW(nf, v)
-    fdedge = ir.READ_ROW(ef, eid)
+def _gen_send(graph, nfr, efr, u, v, eid, mfunc):
+    """Internal function to generate send schedule."""
+    fdsrc = ir.READ_ROW(nfr, u)
+    fddst = ir.READ_ROW(nfr, v)
+    fdedge = ir.READ_ROW(efr, eid)
     def _mfunc_wrapper(src_data, edge_data, dst_data):
-        eb = EdgeBatch(graph, (u.data, v.data, eid.data),
-                src_data, edge_data, dst_data)
-        return mfunc(eb)
+        ebatch = EdgeBatch(graph, (u.data, v.data, eid.data),
+                           src_data, edge_data, dst_data)
+        return mfunc(ebatch)
     _mfunc_wrapper = var.FUNC(_mfunc_wrapper)
     msg = ir.EDGE_UDF(_mfunc_wrapper, fdsrc, fdedge, fddst)
     return msg
diff --git a/python/dgl/runtime/spmv.py b/python/dgl/runtime/spmv.py
index 4a6c6bffa30f..dfc36cccf67b 100644
--- a/python/dgl/runtime/spmv.py
+++ b/python/dgl/runtime/spmv.py
@@ -6,7 +6,7 @@
 from .. import utils
 
 from . import ir
-from .ir import var as var
+from .ir import var
 
 def analyze_v2v_spmv(graph, mfunc, rfunc):
     """Analyze if SPMV from node space to node space can be applied.
@@ -54,7 +54,7 @@ def analyze_v2v_spmv(graph, mfunc, rfunc):
 
     return spmv_pairs, mfunc_left, rfunc_left
 
-def analyze_e2v_spmv(graph, rfunc):
+def analyze_e2v_spmv(graph, rfunc):  # pylint: disable=unused-argument
     """Analyze if SPMV from edge space to node space can be applied.
 
     Parameters
@@ -80,16 +80,16 @@ def analyze_e2v_spmv(graph, rfunc):
             rfunc_left.append(rfn)
     return spmv_rfunc, rfunc_left
 
-def gen_v2v_spmv_schedule(adj, spmv_pairs, nf, ef, eid, out):
+def gen_v2v_spmv_schedule(adj, spmv_pairs, nft, eft, eid, out):
     """Generate v2v spmv schedule.
 
     Parameters
     ----------
     adj : tuple (sparse matrix, utils.Index)
     spmv_pairs : list of pair
-    nf : var.Var
+    nft : var.Var
         input node features
-    ef : var.Var
+    eft : var.Var
         input edge features
     eid : var.Var
         eid index
@@ -103,16 +103,16 @@ def gen_v2v_spmv_schedule(adj, spmv_pairs, nf, ef, eid, out):
         eid = var.IDX(new_eid)
     for mfn, rfn in spmv_pairs:
         if mfn.use_edge_feature:
-            ftedge = ir.READ(ef, eid, var.STR(mfn.edge_field))
-            ftsrc = ir.READ_COL(nf, var.STR(mfn.src_field))
+            ftedge = ir.READ(eft, eid, var.STR(mfn.edge_field))
+            ftsrc = ir.READ_COL(nft, var.STR(mfn.src_field))
             ftdst = ir.SPMV_WITH_DATA(adj_var, ftedge, ftsrc)
         else:
-            ftsrc = ir.READ_COL(nf, var.STR(mfn.src_field))
+            ftsrc = ir.READ_COL(nft, var.STR(mfn.src_field))
             ftdst = ir.SPMV(adj_var, ftsrc)
         # save for merge
         ir.WRITE_COL_(out, var.STR(rfn.out_field), ftdst)
 
-def gen_e2v_spmv_schedule(inc, spmv_rfunc, mf, out):
+def gen_e2v_spmv_schedule(inc, spmv_rfunc, mfr, out):
     """Generate e2v SPMV schedule.
 
     Parameters
@@ -127,7 +127,7 @@ def gen_e2v_spmv_schedule(inc, spmv_rfunc, mf, out):
     incmat, _ = inc
     inc_var = var.SPMAT(incmat)
     for rfn in spmv_rfunc:
-        ftmsg = ir.READ_COL(mf, var.STR(rfn.msg_field))
+        ftmsg = ir.READ_COL(mfr, var.STR(rfn.msg_field))
         ftdst = ir.SPMV(inc_var, ftmsg)
         ir.WRITE_COL_(out, var.STR(rfn.out_field), ftdst)
 
@@ -147,9 +147,9 @@ def build_adj_matrix_graph(graph):
         A index for data shuffling due to sparse format change. Return None
         if shuffle is not required.
     """
-    gi = graph._graph
-    _, shuffle_idx = gi.adjacency_matrix(False, F.cpu())
-    return lambda ctx : gi.adjacency_matrix(False, ctx)[0], shuffle_idx
+    gidx = graph._graph
+    _, shuffle_idx = gidx.adjacency_matrix(False, F.cpu())
+    return lambda ctx: gidx.adjacency_matrix(False, ctx)[0], shuffle_idx
 
 def _build_adj_matrix_index_uv(graph, edges, reduce_nodes):
     """Build adj matrix index and shape using the given (u, v) edges.
@@ -180,7 +180,7 @@ def _build_adj_matrix_index_uv(graph, edges, reduce_nodes):
         The dense shape.
     """
     # TODO(minjie): add node frontier for this
-    new2old, old2new = utils.build_relabel_map(reduce_nodes, sorted=True)
+    _, old2new = utils.build_relabel_map(reduce_nodes, is_sorted=True)
     u, v = edges
     u = u.tousertensor()
     v = v.tousertensor()
@@ -218,13 +218,13 @@ def build_adj_matrix_uv(graph, edges, reduce_nodes):
         if shuffle is not required.
     """
     sp_idx, shape = _build_adj_matrix_index_uv(graph, edges, reduce_nodes)
-    u, v = edges
+    u, _ = edges
     nnz = len(u)
     # FIXME(minjie): data type
     dat = F.ones((nnz,), dtype=F.float32, ctx=F.cpu())
     mat, shuffle_idx = F.sparse_matrix(dat, sp_idx, shape)
     shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None
-    return utils.CtxCachedObject(lambda ctx : F.copy_to(mat, ctx)), shuffle_idx
+    return utils.CtxCachedObject(lambda ctx: F.copy_to(mat, ctx)), shuffle_idx
 
 def build_inc_matrix_graph(graph):
     """Build incidence matrix.
@@ -242,16 +242,16 @@ def build_inc_matrix_graph(graph):
         A index for data shuffling due to sparse format change. Return None
         if shuffle is not required.
     """
-    gi = graph._graph
+    gidx = graph._graph
     # inc mat will not use data tensor so conversion index is not needed
-    return lambda ctx : gi.incidence_matrix('in', ctx)[0], None
+    return lambda ctx: gidx.incidence_matrix('in', ctx)[0], None
 
 def build_inc_matrix_eid(m, eid, dst, reduce_nodes):
     """Build incidence matrix using edge id and edge dst nodes.
 
     The incidence matrix is of shape (n, m), where n=len(reduce_nodes).
     The nnz is equal to len(eid).
-    
+
     Invariant: len(eid) == len(dst)
 
     The dst nodes will be sorted in the *unique-ascending* order of
@@ -296,7 +296,7 @@ def build_inc_matrix_eid(m, eid, dst, reduce_nodes):
         A index for data shuffling due to sparse format change. Return None
         if shuffle is not required.
     """
-    new2old, old2new = utils.build_relabel_map(reduce_nodes, sorted=True)
+    _, old2new = utils.build_relabel_map(reduce_nodes, is_sorted=True)
     dst = dst.tousertensor()
     eid = eid.tousertensor()
     # relabel edges dsts
@@ -311,7 +311,7 @@ def build_inc_matrix_eid(m, eid, dst, reduce_nodes):
     dat = F.ones((nnz,), dtype=F.float32, ctx=F.cpu())
     mat, _ = F.sparse_matrix(dat, ('coo', idx), (n, m))
     # inc mat will not use data tensor so conversion index is not needed
-    return utils.CtxCachedObject(lambda ctx : F.copy_to(mat, ctx)), None
+    return utils.CtxCachedObject(lambda ctx: F.copy_to(mat, ctx)), None
 
 def build_inc_matrix_dst(dst, reduce_nodes):
     """Build incidence matrix using only edge destinations.
@@ -332,7 +332,7 @@ def build_inc_matrix_dst(dst, reduce_nodes):
             [0, 0, 0, 0, 0],
             [0, 0, 1, 0, 0],
             [0, 0, 0, 1, 1]], shape=(5, 5))
-    
+
     Parameters
     ----------
     dst : utils.Index
diff --git a/python/dgl/subgraph.py b/python/dgl/subgraph.py
index adddf3d96059..9425f0fdd338 100644
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
@@ -1,9 +1,6 @@
 """Class for subgraph data structure."""
 from __future__ import absolute_import
 
-import networkx as nx
-
-from . import backend as F
 from .frame import Frame, FrameRef
 from .graph import DGLGraph
 from . import utils
@@ -47,22 +44,24 @@ class DGLSubGraph(DGLGraph):
     def __init__(self, parent, parent_nid, parent_eid, graph_idx, shared=False):
         super(DGLSubGraph, self).__init__(graph_data=graph_idx,
                                           readonly=graph_idx.is_readonly())
+        if shared:
+            raise DGLError('Shared mode is not yet supported.')
         self._parent = parent
         self._parent_nid = parent_nid
         self._parent_eid = parent_eid
 
     # override APIs
-    def add_nodes(self, num, reprs=None):
+    def add_nodes(self, num, data=None):
         """Add nodes. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
-    def add_edge(self, u, v, reprs=None):
+    def add_edge(self, u, v, data=None):
         """Add one edge. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
-    def add_edges(self, u, v, reprs=None):
+    def add_edges(self, u, v, data=None):
         """Add many edges. Disabled because BatchedDGLGraph is read-only."""
-        raise RuntimeError('Readonly graph. Mutation is not allowed.')
+        raise DGLError('Readonly graph. Mutation is not allowed.')
 
     @property
     def parent_nid(self):
@@ -110,10 +109,10 @@ def copy_to_parent(self, inplace=False):
             If true, use inplace write (no gradient but faster)
         """
         self._parent._node_frame.update_rows(
-                self._parent_nid, self._node_frame, inplace=inplace)
+            self._parent_nid, self._node_frame, inplace=inplace)
         if self._parent._edge_frame.num_rows != 0:
             self._parent._edge_frame.update_rows(
-                    self._get_parent_eid(), self._edge_frame, inplace=inplace)
+                self._get_parent_eid(), self._edge_frame, inplace=inplace)
 
     def copy_from_parent(self):
         """Copy node/edge features from the parent graph.
diff --git a/python/dgl/traversal.py b/python/dgl/traversal.py
index 5af41216b2ff..9cc48794a962 100644
--- a/python/dgl/traversal.py
+++ b/python/dgl/traversal.py
@@ -9,7 +9,7 @@
            'topological_nodes_generator',
            'dfs_edges_generator', 'dfs_labeled_edges_generator',]
 
-def bfs_nodes_generator(graph, source, reversed=False):
+def bfs_nodes_generator(graph, source, reverse=False):
     """Node frontiers generator using breadth-first search.
 
     Parameters
@@ -18,7 +18,7 @@ def bfs_nodes_generator(graph, source, reversed=False):
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, default False
+    reverse : bool, default False
         If True, traverse following the in-edge direction.
 
     Returns
@@ -41,14 +41,14 @@ def bfs_nodes_generator(graph, source, reversed=False):
     """
     ghandle = graph._graph._handle
     source = utils.toindex(source)
-    ret = _CAPI_DGLBFSNodes(ghandle, source.todgltensor(), reversed)
+    ret = _CAPI_DGLBFSNodes(ghandle, source.todgltensor(), reverse)
     all_nodes = utils.toindex(ret(0)).tousertensor()
     # TODO(minjie): how to support directly creating python list
     sections = utils.toindex(ret(1)).tonumpy().tolist()
     node_frontiers = F.split(all_nodes, sections, dim=0)
     return node_frontiers
 
-def bfs_edges_generator(graph, source, reversed=False):
+def bfs_edges_generator(graph, source, reverse=False):
     """Edges frontiers generator using breadth-first search.
 
     Parameters
@@ -57,7 +57,7 @@ def bfs_edges_generator(graph, source, reversed=False):
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, default False
+    reverse : bool, default False
         If True, traverse following the in-edge direction.
 
     Returns
@@ -81,21 +81,21 @@ def bfs_edges_generator(graph, source, reversed=False):
     """
     ghandle = graph._graph._handle
     source = utils.toindex(source)
-    ret = _CAPI_DGLBFSEdges(ghandle, source.todgltensor(), reversed)
+    ret = _CAPI_DGLBFSEdges(ghandle, source.todgltensor(), reverse)
     all_edges = utils.toindex(ret(0)).tousertensor()
     # TODO(minjie): how to support directly creating python list
     sections = utils.toindex(ret(1)).tonumpy().tolist()
     edge_frontiers = F.split(all_edges, sections, dim=0)
     return edge_frontiers
 
-def topological_nodes_generator(graph, reversed=False):
+def topological_nodes_generator(graph, reverse=False):
     """Node frontiers generator using topological traversal.
 
     Parameters
     ----------
     graph : DGLGraph
         The graph object.
-    reversed : bool, optional
+    reverse : bool, optional
         If True, traverse following the in-edge direction.
 
     Returns
@@ -117,13 +117,13 @@ def topological_nodes_generator(graph, reversed=False):
     [tensor([0]), tensor([1]), tensor([2]), tensor([3, 4]), tensor([5])]
     """
     ghandle = graph._graph._handle
-    ret = _CAPI_DGLTopologicalNodes(ghandle, reversed)
+    ret = _CAPI_DGLTopologicalNodes(ghandle, reverse)
     all_nodes = utils.toindex(ret(0)).tousertensor()
     # TODO(minjie): how to support directly creating python list
     sections = utils.toindex(ret(1)).tonumpy().tolist()
     return F.split(all_nodes, sections, dim=0)
 
-def dfs_edges_generator(graph, source, reversed=False):
+def dfs_edges_generator(graph, source, reverse=False):
     """Edge frontiers generator using depth-first-search (DFS).
 
     Multiple source nodes can be specified to start the DFS traversal. One
@@ -137,7 +137,7 @@ def dfs_edges_generator(graph, source, reversed=False):
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, optional
+    reverse : bool, optional
         If True, traverse following the in-edge direction.
 
     Returns
@@ -162,7 +162,7 @@ def dfs_edges_generator(graph, source, reversed=False):
     """
     ghandle = graph._graph._handle
     source = utils.toindex(source)
-    ret = _CAPI_DGLDFSEdges(ghandle, source.todgltensor(), reversed)
+    ret = _CAPI_DGLDFSEdges(ghandle, source.todgltensor(), reverse)
     all_edges = utils.toindex(ret(0)).tousertensor()
     # TODO(minjie): how to support directly creating python list
     sections = utils.toindex(ret(1)).tonumpy().tolist()
@@ -171,7 +171,7 @@ def dfs_edges_generator(graph, source, reversed=False):
 def dfs_labeled_edges_generator(
         graph,
         source,
-        reversed=False,
+        reverse=False,
         has_reverse_edge=False,
         has_nontree_edge=False,
         return_labels=True):
@@ -199,7 +199,7 @@ def dfs_labeled_edges_generator(
         The graph object.
     source : list, tensor of nodes
         Source nodes.
-    reversed : bool, optional
+    reverse : bool, optional
         If true, traverse following the in-edge direction.
     has_reverse_edge : bool, optional
         True to include reverse edges.
@@ -234,12 +234,12 @@ def dfs_labeled_edges_generator(
     ghandle = graph._graph._handle
     source = utils.toindex(source)
     ret = _CAPI_DGLDFSLabeledEdges(
-            ghandle,
-            source.todgltensor(),
-            reversed,
-            has_reverse_edge,
-            has_nontree_edge,
-            return_labels)
+        ghandle,
+        source.todgltensor(),
+        reverse,
+        has_reverse_edge,
+        has_nontree_edge,
+        return_labels)
     all_edges = utils.toindex(ret(0)).tousertensor()
     # TODO(minjie): how to support directly creating python list
     if return_labels:
diff --git a/python/dgl/udf.py b/python/dgl/udf.py
index 7187a308d282..4a895140f644 100644
--- a/python/dgl/udf.py
+++ b/python/dgl/udf.py
@@ -1,7 +1,7 @@
 """User-defined function related data structures."""
 from __future__ import absolute_import
 
-from .base import ALL, is_all
+from .base import is_all
 from . import backend as F
 from . import utils
 
diff --git a/python/dgl/utils.py b/python/dgl/utils.py
index b429d1adfb18..c8afc2a87cd4 100644
--- a/python/dgl/utils.py
+++ b/python/dgl/utils.py
@@ -1,7 +1,7 @@
 """Utility module."""
 from __future__ import absolute_import, division
 
-from collections import Mapping, Iterable
+from collections.abc import Mapping, Iterable
 from functools import wraps
 import numpy as np
 
@@ -43,7 +43,7 @@ def __getitem__(self, i):
     def _dispatch(self, data):
         """Store data based on its type."""
         if F.is_tensor(data):
-            if not (F.dtype(data) == F.int64):
+            if F.dtype(data) != F.int64:
                 raise DGLError('Index data must be an int64 vector, but got: %s' % str(data))
             if len(F.shape(data)) > 1:
                 raise DGLError('Index data must be 1D int64 vector, but got: %s' % str(data))
@@ -63,19 +63,17 @@ def _dispatch(self, data):
             self._slice_data = slice(data.start, data.stop)
         else:
             try:
-                self._pydata = np.array([int(data)]).astype(np.int64)
-            except:
-                try:
-                    data = np.array(data).astype(np.int64)
-                    if data.ndim != 1:
-                        raise DGLError('Index data must be 1D int64 vector,'
-                                       ' but got: %s' % str(data))
-                    self._pydata = data
-                except:
-                    raise DGLError('Error index data: %s' % str(data))
+                data = np.array(data).astype(np.int64)
+            except Exception:  # pylint: disable=broad-except
+                raise DGLError('Error index data: %s' % str(data))
+            if data.ndim == 0:  # scalar array
+                data = np.expand_dims(data, 0)
+            elif data.ndim != 1:
+                raise DGLError('Index data must be 1D int64 vector,'
+                               ' but got: %s' % str(data))
+            self._pydata = data
             self._user_tensor_data[F.cpu()] = F.zerocopy_from_numpy(self._pydata)
 
-
     def tonumpy(self):
         """Convert to a numpy ndarray."""
         if self._pydata is None:
@@ -96,8 +94,8 @@ def tousertensor(self, ctx=None):
         if len(self._user_tensor_data) == 0:
             if self._dgl_tensor_data is not None:
                 # zero copy from dgl tensor
-                dl = self._dgl_tensor_data.to_dlpack()
-                self._user_tensor_data[F.cpu()] = F.zerocopy_from_dlpack(dl)
+                dlpack = self._dgl_tensor_data.to_dlpack()
+                self._user_tensor_data[F.cpu()] = F.zerocopy_from_dlpack(dlpack)
             else:
                 # zero copy from numpy array
                 self._user_tensor_data[F.cpu()] = F.zerocopy_from_numpy(self.tonumpy())
@@ -112,10 +110,17 @@ def todgltensor(self):
         if self._dgl_tensor_data is None:
             # zero copy from user tensor
             tsor = self.tousertensor()
-            dl = F.zerocopy_to_dlpack(tsor)
-            self._dgl_tensor_data = nd.from_dlpack(dl)
+            dlpack = F.zerocopy_to_dlpack(tsor)
+            self._dgl_tensor_data = nd.from_dlpack(dlpack)
         return self._dgl_tensor_data
 
+    def slice_data(self):
+        """Return the internal slice data.
+
+        If this index is not initialized from slice, the return will be None.
+        """
+        return self._slice_data
+
     def is_slice(self, start, stop):
         """Check if Index wraps a slice data with given start and stop"""
         return self._slice_data == slice(start, stop)
@@ -136,20 +141,26 @@ def get_items(self, index):
         Returns
         -------
         utils.Index
-
+            The values at the given position.
         """
-        if index._slice_data is None:
+        if self._slice_data is not None and self._slice_data.start == 0:
+            # short-cut for identical mapping
+            # NOTE: we don't check for out-of-bound error
+            return index
+        elif index._slice_data is None:
+            # the provided index is not a slice
             tensor = self.tousertensor()
             index = index.tousertensor()
             return Index(F.gather_row(tensor, index))
         elif self._slice_data is None:
+            # the current index is not a slice but the provided is a slice
             tensor = self.tousertensor()
             index = index._slice_data
             return Index(F.narrow_row(tensor, index.start, index.stop))
         else:
             # both self and index wrap a slice object, then return another
             # Index wrapping a slice
-            start = self._slicedata.start
+            start = self._slice_data.start
             index = index._slice_data
             return Index(slice(start + index.start, start + index.stop))
 
@@ -168,7 +179,7 @@ def set_items(self, index, value):
         Returns
         -------
         utils.Index
-
+            The new values.
         """
         tensor = self.tousertensor()
         index = index.tousertensor()
@@ -207,8 +218,24 @@ def has_nonzero(self):
         tensor = self.tousertensor()
         return F.sum(tensor, 0) > 0
 
-def toindex(x):
-    return x if isinstance(x, Index) else Index(x)
+def toindex(data):
+    """Convert the given data to Index object.
+
+    Parameters
+    ----------
+    data : index data
+        Data to create the index.
+
+    Returns
+    -------
+    Index
+        The index object.
+
+    See Also
+    --------
+    Index
+    """
+    return data if isinstance(data, Index) else Index(data)
 
 def zero_index(size):
     """Create a index with provided size initialized to zero
@@ -244,21 +271,22 @@ def keys(self):
 
 class HybridDict(Mapping):
     """A readonly dictonary that merges several dict-like (python dict, LazyDict).
-       If there are duplicate keys, early keys have priority over latter ones
+
+    If there are duplicate keys, early keys have priority over latter ones.
     """
     def __init__(self, *dict_like_list):
         self._dict_like_list = dict_like_list
         self._keys = set()
-        for d in dict_like_list:
-            self._keys.update(d.keys())
+        for obj in dict_like_list:
+            self._keys.update(obj.keys())
 
     def keys(self):
         return self._keys
 
     def __getitem__(self, key):
-        for d in self._dict_like_list:
-            if key in d:
-                return d[key]
+        for obj in self._dict_like_list:
+            if key in obj:
+                return obj[key]
         raise KeyError(key)
 
     def __contains__(self, key):
@@ -290,7 +318,7 @@ def __iter__(self):
     def __len__(self):
         return len(self._dict_like)
 
-def build_relabel_map(x, sorted=False):
+def build_relabel_map(x, is_sorted=False):
     """Relabel the input ids to continuous ids that starts from zero.
 
     Ids are assigned new ids according to their ascending order.
@@ -310,7 +338,7 @@ def build_relabel_map(x, sorted=False):
     ----------
     x : Index
         The input ids.
-    sorted : bool, default=False
+    is_sorted : bool, default=False
         Whether the input has already been unique and sorted.
 
     Returns
@@ -323,7 +351,7 @@ def build_relabel_map(x, sorted=False):
         new id tensor: new_id = old_to_new[old_id]
     """
     x = x.tousertensor()
-    if not sorted:
+    if not is_sorted:
         unique_x, _ = F.sort_1d(F.unique(x))
     else:
         unique_x = x
@@ -397,6 +425,7 @@ def wrapper(self, *args):
     return _creator
 
 def is_dict_like(obj):
+    """Return true if the object can be treated as a dictionary."""
     return isinstance(obj, Mapping)
 
 def reorder(dict_like, index):
diff --git a/python/dgl/view.py b/python/dgl/view.py
index 7dbeb47b12f3..41f832f4a6d2 100644
--- a/python/dgl/view.py
+++ b/python/dgl/view.py
@@ -1,11 +1,11 @@
 """Views of DGLGraph."""
 from __future__ import absolute_import
 
-from collections import MutableMapping, namedtuple
+from collections import namedtuple
+from collections.abc import MutableMapping
 
 from .base import ALL, is_all, DGLError
 from . import backend as F
-from . import utils
 
 NodeSpace = namedtuple('NodeSpace', ['data'])
 
@@ -41,6 +41,12 @@ def __call__(self):
         return F.arange(0, len(self))
 
 class NodeDataView(MutableMapping):
+    """The data view class when G.nodes[...].data is called.
+
+    See Also
+    --------
+    dgl.DGLGraph.nodes
+    """
     __slots__ = ['_graph', '_nodes']
 
     def __init__(self, graph, nodes):
@@ -103,6 +109,12 @@ def __call__(self, *args, **kwargs):
         return self._graph.all_edges(*args, **kwargs)
 
 class EdgeDataView(MutableMapping):
+    """The data view class when G.edges[...].data is called.
+
+    See Also
+    --------
+    dgl.DGLGraph.edges
+    """
     __slots__ = ['_graph', '_edges']
 
     def __init__(self, graph, edges):
diff --git a/tests/graph_index/test_basics.py b/tests/graph_index/test_basics.py
index 82416b002601..5794f191329c 100644
--- a/tests/graph_index/test_basics.py
+++ b/tests/graph_index/test_basics.py
@@ -145,10 +145,11 @@ def test_create_from_elist():
     for i, (u, v) in enumerate(elist):
         assert g.edge_id(u, v)[0] == i
     # immutable graph
-    g = create_graph_index(elist, readonly=True)
-    for i, (u, v) in enumerate(elist):
-        print(u, v, g.edge_id(u, v)[0])
-        assert g.edge_id(u, v)[0] == i
+    # TODO: disabled due to torch support
+    #g = create_graph_index(elist, readonly=True)
+    #for i, (u, v) in enumerate(elist):
+    #    print(u, v, g.edge_id(u, v)[0])
+    #    assert g.edge_id(u, v)[0] == i
 
 if __name__ == '__main__':
     test_edge_id()
diff --git a/tests/lint/pylintrc b/tests/lint/pylintrc
new file mode 100644
index 000000000000..e8343b440d82
--- /dev/null
+++ b/tests/lint/pylintrc
@@ -0,0 +1,499 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS,_cy2,_cy3,backend,data,nn,contrib
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=4
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Specify a configuration file.
+#rcfile=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=design,
+        similarities,
+        no-self-use,
+        attribute-defined-outside-init,
+        locally-disabled,
+        star-args,
+        pointless-except,
+        bad-option-value,
+        global-statement,
+        fixme,
+        suppressed-message,
+        useless-suppression,
+        locally-enabled,
+        import-error,
+        unsubscriptable-object,
+        unbalanced-tuple-unpacking,
+        protected-access,
+        useless-object-inheritance,
+        no-else-return,
+        len-as-condition,
+        cyclic-import,       # disabled due to the inevitable dgl.graph -> dgl.subgraph loop
+        undefined-variable,  # disabled due to C extension (should enable)
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[REPORTS]
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style.
+#class-attribute-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,j,k,u,v,e,n,m,w,x,y,g,fn,ex,Run,_
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style.
+#variable-rgx=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package..
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[LOGGING]
+
+# Format style used to check logging format string. `old` means using %
+# formatting, while `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=4000
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,
+               dict-separator
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=dgl.backend,dgl._api_internal
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+
+[IMPORTS]
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=yes
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement.
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception".
+overgeneral-exceptions=Exception
diff --git a/tests/mxnet/test_graph_index.py b/tests/mxnet/test_graph_index.py
index 3a578572dfb8..d92a7fed6543 100644
--- a/tests/mxnet/test_graph_index.py
+++ b/tests/mxnet/test_graph_index.py
@@ -4,8 +4,7 @@
 import numpy as np
 import scipy as sp
 import dgl
-from dgl.graph import GraphIndex, create_graph_index
-from dgl.graph_index import map_to_subgraph_nid
+from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index
 from dgl import utils
 
 def generate_rand_graph(n):
diff --git a/tests/pytorch/test_basics.py b/tests/pytorch/test_basics.py
index a2451416ea8a..a171e0dca774 100644
--- a/tests/pytorch/test_basics.py
+++ b/tests/pytorch/test_basics.py
@@ -596,11 +596,12 @@ def test_repr():
     G.add_nodes(10)
     G.add_edge(0, 1)
     repr_string = G.__repr__()
+    print(repr_string)
     G.ndata['x'] = th.zeros((10, 5))
     G.add_edges([0, 1], 2)
     G.edata['y'] = th.zeros((3, 4))
     repr_string = G.__repr__()
-
+    print(repr_string)
 
 if __name__ == '__main__':
     test_nx_conversion()
diff --git a/tests/pytorch/test_frame.py b/tests/pytorch/test_frame.py
index e8f27ca6b347..958071e4c36c 100644
--- a/tests/pytorch/test_frame.py
+++ b/tests/pytorch/test_frame.py
@@ -61,7 +61,7 @@ def failed_add_col():
 def test_column2():
     # Test frameref column getter/setter
     data = Frame(create_test_data())
-    f = FrameRef(data, [3, 4, 5, 6, 7])
+    f = FrameRef(data, toindex([3, 4, 5, 6, 7]))
     assert f.num_rows == 5
     assert len(f) == 3
     assert U.allclose(f['a1'], data['a1'].data[3:8])
@@ -111,7 +111,7 @@ def test_append2():
     assert not f.is_span_whole_column()
     assert f.num_rows == 3 * N
     new_idx = list(range(N)) + list(range(2*N, 4*N))
-    assert th.all(f.index().tousertensor() == th.tensor(new_idx, dtype=th.int64))
+    assert th.all(f._index.tousertensor() == th.tensor(new_idx, dtype=th.int64))
     assert data.num_rows == 4 * N
 
 def test_append3():
@@ -233,8 +233,8 @@ def test_row4():
 
 def test_sharing():
     data = Frame(create_test_data())
-    f1 = FrameRef(data, index=[0, 1, 2, 3])
-    f2 = FrameRef(data, index=[2, 3, 4, 5, 6])
+    f1 = FrameRef(data, index=toindex([0, 1, 2, 3]))
+    f2 = FrameRef(data, index=toindex([2, 3, 4, 5, 6]))
     # test read
     for k, v in f1.items():
         assert U.allclose(data[k].data[0:4], v)
@@ -260,8 +260,8 @@ def test_sharing():
 
 def test_slicing():
     data = Frame(create_test_data(grad=True))
-    f1 = FrameRef(data, index=slice(1, 5))
-    f2 = FrameRef(data, index=slice(3, 8))
+    f1 = FrameRef(data, index=toindex(slice(1, 5)))
+    f2 = FrameRef(data, index=toindex(slice(3, 8)))
     # test read
     for k, v in f1.items():
         assert U.allclose(data[k].data[1:5], v)
@@ -279,15 +279,15 @@ def test_slicing():
             'a2': th.ones([2, D]),
             'a3': th.ones([2, D]),
             }
-    f2_a1[0:2] = 1
+    f2_a1[toindex(slice(0,2))] = 1
     assert U.allclose(f2['a1'], f2_a1)
 
-    f1[2:4] = {
+    f1[toindex(slice(2,4))] = {
             'a1': th.zeros([2, D]),
             'a2': th.zeros([2, D]),
             'a3': th.zeros([2, D]),
             }
-    f2_a1[0:2] = 0
+    f2_a1[toindex(slice(0,2))] = 0
     assert U.allclose(f2['a1'], f2_a1)
 
 def test_add_rows():
@@ -299,12 +299,48 @@ def test_add_rows():
     ans = th.cat([x, th.zeros(3, 4)])
     assert U.allclose(f1['x'], ans)
     f1.add_rows(4)
-    f1[4:8] = {'x': th.ones(4, 4), 'y': th.ones(4, 5)}
+    f1[toindex(slice(4,8))] = {'x': th.ones(4, 4), 'y': th.ones(4, 5)}
     ans = th.cat([ans, th.ones(4, 4)])
     assert U.allclose(f1['x'], ans)
     ans = th.cat([th.zeros(4, 5), th.ones(4, 5)])
     assert U.allclose(f1['y'], ans)
 
+def test_inplace():
+    f = FrameRef(Frame(create_test_data()))
+    print(f.schemes)
+    a1addr = f['a1'].data.data_ptr()
+    a2addr = f['a2'].data.data_ptr()
+    a3addr = f['a3'].data.data_ptr()
+
+    # column updates are always out-of-place
+    f['a1'] = th.ones((N, D))
+    newa1addr = f['a1'].data.data_ptr()
+    assert a1addr != newa1addr
+    a1addr = newa1addr
+    # full row update that becomes column update
+    f[toindex(slice(0, N))] = {'a1' : th.ones((N, D))}
+    assert f['a1'].data.data_ptr() != a1addr
+
+    # row update (outplace) w/ slice
+    f[toindex(slice(1, 4))] = {'a2' : th.ones((3, D))}
+    newa2addr = f['a2'].data.data_ptr()
+    assert a2addr != newa2addr
+    a2addr = newa2addr
+    # row update (outplace) w/ list
+    f[toindex([1, 3, 5])] = {'a2' : th.ones((3, D))}
+    newa2addr = f['a2'].data.data_ptr()
+    assert a2addr != newa2addr
+    a2addr = newa2addr
+
+    # row update (inplace) w/ slice
+    f.update_data(toindex(slice(1, 4)), {'a2' : th.ones((3, D))}, True)
+    newa2addr = f['a2'].data.data_ptr()
+    assert a2addr == newa2addr
+    # row update (inplace) w/ list
+    f.update_data(toindex([1, 3, 5]), {'a2' : th.ones((3, D))}, True)
+    newa2addr = f['a2'].data.data_ptr()
+    assert a2addr == newa2addr
+
 if __name__ == '__main__':
     test_create()
     test_column1()
@@ -319,3 +355,4 @@ def test_add_rows():
     test_sharing()
     test_slicing()
     test_add_rows()
+    test_inplace()
diff --git a/tests/pytorch/test_graph.py b/tests/pytorch/test_graph.py
index 302e6bec5b69..1d59fb0678c2 100644
--- a/tests/pytorch/test_graph.py
+++ b/tests/pytorch/test_graph.py
@@ -33,9 +33,10 @@ def test_create_from_elist():
     for i, (u, v) in enumerate(elist):
         assert g.edge_id(u, v) == i
     # immutable graph
-    g = dgl.DGLGraph(elist, readonly=True)
-    for i, (u, v) in enumerate(elist):
-        assert g.edge_id(u, v) == i
+    # XXX: not enabled for pytorch
+    #g = dgl.DGLGraph(elist, readonly=True)
+    #for i, (u, v) in enumerate(elist):
+    #    assert g.edge_id(u, v) == i
 
 def test_adjmat_cache():
     n = 1000
@@ -109,7 +110,7 @@ def test_incmat_cache():
     assert dur2 < dur1
     assert id(inc1) == id(inc2)
     # different arg should result in different cache
-    inc3 = g.incidence_matrix(type="both")
+    inc3 = g.incidence_matrix("both")
     assert id(inc3) != id(inc2)
     # manually clear the cache
     g.clear_cache()
diff --git a/tests/pytorch/test_pickle.py b/tests/pytorch/test_pickle.py
index 8196b9ee9ccb..3f570f05abee 100644
--- a/tests/pytorch/test_pickle.py
+++ b/tests/pytorch/test_pickle.py
@@ -112,7 +112,7 @@ def test_pickling_graph():
     assert new_g._message_func == _global_message_func
     assert isinstance(new_g._reduce_func, type(reduce_func))
     assert new_g._reduce_func._name == 'sum'
-    assert new_g._reduce_func.op == backend.sum
+    assert new_g._reduce_func.reduce_op == backend.sum
     assert new_g._reduce_func.msg_field == 'x'
     assert new_g._reduce_func.out_field == 'x'
 
diff --git a/tests/scripts/task_lint.sh b/tests/scripts/task_lint.sh
index 50b1cc63088f..f7634a1bfa5c 100644
--- a/tests/scripts/task_lint.sh
+++ b/tests/scripts/task_lint.sh
@@ -3,3 +3,7 @@
 # cpplint
 echo 'Checking code style of C++ codes...'
 python3 third_party/dmlc-core/scripts/lint.py dgl cpp include src
+
+# pylint
+echo 'Checking code style of python codes...'
+python3 -m pylint --reports=y -v --rcfile=tests/lint/pylintrc python/dgl