diff --git a/example/imagenet/alexnet.ipynb b/example/imagenet/alexnet.ipynb
new file mode 100644
index 000000000000..1e9e399d1b5f
--- /dev/null
+++ b/example/imagenet/alexnet.ipynb
@@ -0,0 +1,445 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Basic AlexNet Example\n",
+    "--------\n",
+    "\n",
+    "This notebook shows how to use MXNet construct AlexNet. AlexNet is made by Alex Krizhevsky in 2012.\n",
+    "\n",
+    "We  will show how to train AlexNet in Python with single/multi GPU. All you need is to write a piece of Python code to describe network, then MXNet will help you finish all work without any of your effort. \n",
+    "\n",
+    "Generally, we need \n",
+    "\n",
+    "- Declare symbol network\n",
+    "- Declare data iterator\n",
+    "- Bind symbol network to device to model\n",
+    "- Fit the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "import mxnet as mx"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we have successully load MXNet. we will start declare a symbolic network. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "input_data = mx.symbol.Variable(name=\"data\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use a special symbol ```Variable``` to represent input data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "# stage 1\n",
+    "conv1 = mx.symbol.Convolution(data=input_data, kernel=(11, 11), stride=(4, 4), num_filter=96)\n",
+    "relu1 = mx.symbol.Activation(data=conv1, act_type=\"relu\")\n",
+    "pool1 = mx.symbol.Pooling(data=relu1, pool_type=\"max\", kernel=(3, 3), stride=(2,2))\n",
+    "lrn1 = mx.symbol.LRN(data=pool1, alpha=0.0001, beta=0.75, knorm=1, nsize=5)\n",
+    "# stage 2\n",
+    "conv2 = mx.symbol.Convolution(data=lrn1, kernel=(5, 5), pad=(2, 2), num_filter=256)\n",
+    "relu2 = mx.symbol.Activation(data=conv2, act_type=\"relu\")\n",
+    "pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2))\n",
+    "lrn2 = mx.symbol.LRN(data=pool2, alpha=0.0001, beta=0.75, knorm=1, nsize=5)\n",
+    "# stage 3\n",
+    "conv3 = mx.symbol.Convolution(data=lrn2, kernel=(3, 3), pad=(1, 1), num_filter=384)\n",
+    "relu3 = mx.symbol.Activation(data=conv3, act_type=\"relu\")\n",
+    "conv4 = mx.symbol.Convolution(data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384)\n",
+    "relu4 = mx.symbol.Activation(data=conv4, act_type=\"relu\")\n",
+    "conv5 = mx.symbol.Convolution(data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256)\n",
+    "relu5 = mx.symbol.Activation(data=conv5, act_type=\"relu\")\n",
+    "pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2))\n",
+    "# stage 4\n",
+    "flatten = mx.symbol.Flatten(data=pool3)\n",
+    "fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096)\n",
+    "relu6 = mx.symbol.Activation(data=fc1, act_type=\"relu\")\n",
+    "dropout1 = mx.symbol.Dropout(data=relu6, p=0.5)\n",
+    "# stage 5\n",
+    "fc2 = mx.symbol.FullyConnected(data=dropout1, num_hidden=4096)\n",
+    "relu7 = mx.symbol.Activation(data=fc2, act_type=\"relu\")\n",
+    "dropout2 = mx.symbol.Dropout(data=relu7, p=0.5)\n",
+    "# stage 6\n",
+    "fc3 = mx.symbol.FullyConnected(data=dropout2, num_hidden=1000)\n",
+    "softmax = mx.symbol.Softmax(data=fc3)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we have a AlexNet. The ```softmax``` symbol contains all network structures. We can visualize our network structure. (require ```graphviz``` package)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "image/svg+xml": [
+       "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
+       "<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
+       " \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
+       "<!-- Generated by graphviz version 2.36.0 (20140111.2315)\n",
+       " -->\n",
+       "<!-- Title: AlexNet Pages: 1 -->\n",
+       "<svg width=\"102pt\" height=\"2322pt\"\n",
+       " viewBox=\"0.00 0.00 102.00 2322.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
+       "<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 2318)\">\n",
+       "<title>AlexNet</title>\n",
+       "<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-2318 98,-2318 98,4 -4,4\"/>\n",
+       "<!-- null_0 -->\n",
+       "<g id=\"node1\" class=\"node\"><title>null_0</title>\n",
+       "<polygon fill=\"lightgrey\" stroke=\"black\" points=\"94,-58 -7.10543e-15,-58 -7.10543e-15,-3.55271e-15 94,-3.55271e-15 94,-58\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-25.3\" font-family=\"Times,serif\" font-size=\"14.00\">data</text>\n",
+       "</g>\n",
+       "<!-- Convolution_3 -->\n",
+       "<g id=\"node2\" class=\"node\"><title>Convolution_3</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-152 -7.10543e-15,-152 -7.10543e-15,-94 94,-94 94,-152\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-126.8\" font-family=\"Times,serif\" font-size=\"14.00\">Convolution</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-111.8\" font-family=\"Times,serif\" font-size=\"14.00\">11x11/4, 96</text>\n",
+       "</g>\n",
+       "<!-- Convolution_3&#45;&gt;null_0 -->\n",
+       "<g id=\"edge1\" class=\"edge\"><title>Convolution_3&#45;&gt;null_0</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-83.7443C47,-75.2043 47,-66.2977 47,-58.2479\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-83.897 47,-93.8971 50.5001,-83.8971 43.5001,-83.897\"/>\n",
+       "</g>\n",
+       "<!-- Activation_4 -->\n",
+       "<g id=\"node3\" class=\"node\"><title>Activation_4</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-246 -7.10543e-15,-246 -7.10543e-15,-188 94,-188 94,-246\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-220.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-205.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_4&#45;&gt;Convolution_3 -->\n",
+       "<g id=\"edge2\" class=\"edge\"><title>Activation_4&#45;&gt;Convolution_3</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-177.744C47,-169.204 47,-160.298 47,-152.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-177.897 47,-187.897 50.5001,-177.897 43.5001,-177.897\"/>\n",
+       "</g>\n",
+       "<!-- Pooling_5 -->\n",
+       "<g id=\"node4\" class=\"node\"><title>Pooling_5</title>\n",
+       "<polygon fill=\"#ee2c2c\" stroke=\"#ee2c2c\" points=\"94,-340 -7.10543e-15,-340 -7.10543e-15,-282 94,-282 94,-340\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-314.8\" font-family=\"Times,serif\" font-size=\"14.00\">Pooling</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-299.8\" font-family=\"Times,serif\" font-size=\"14.00\">max, 3x3/2</text>\n",
+       "</g>\n",
+       "<!-- Pooling_5&#45;&gt;Activation_4 -->\n",
+       "<g id=\"edge3\" class=\"edge\"><title>Pooling_5&#45;&gt;Activation_4</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-271.744C47,-263.204 47,-254.298 47,-246.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-271.897 47,-281.897 50.5001,-271.897 43.5001,-271.897\"/>\n",
+       "</g>\n",
+       "<!-- LRN_6 -->\n",
+       "<g id=\"node5\" class=\"node\"><title>LRN_6</title>\n",
+       "<polygon fill=\"#c0ff3e\" stroke=\"#c0ff3e\" points=\"94,-434 -7.10543e-15,-434 -7.10543e-15,-376 94,-376 94,-434\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-401.3\" font-family=\"Times,serif\" font-size=\"14.00\">LRN</text>\n",
+       "</g>\n",
+       "<!-- LRN_6&#45;&gt;Pooling_5 -->\n",
+       "<g id=\"edge4\" class=\"edge\"><title>LRN_6&#45;&gt;Pooling_5</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-365.744C47,-357.204 47,-348.298 47,-340.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-365.897 47,-375.897 50.5001,-365.897 43.5001,-365.897\"/>\n",
+       "</g>\n",
+       "<!-- Convolution_9 -->\n",
+       "<g id=\"node6\" class=\"node\"><title>Convolution_9</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-528 -7.10543e-15,-528 -7.10543e-15,-470 94,-470 94,-528\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-502.8\" font-family=\"Times,serif\" font-size=\"14.00\">Convolution</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-487.8\" font-family=\"Times,serif\" font-size=\"14.00\">5x5/1, 256</text>\n",
+       "</g>\n",
+       "<!-- Convolution_9&#45;&gt;LRN_6 -->\n",
+       "<g id=\"edge5\" class=\"edge\"><title>Convolution_9&#45;&gt;LRN_6</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-459.744C47,-451.204 47,-442.298 47,-434.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-459.897 47,-469.897 50.5001,-459.897 43.5001,-459.897\"/>\n",
+       "</g>\n",
+       "<!-- Activation_10 -->\n",
+       "<g id=\"node7\" class=\"node\"><title>Activation_10</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-622 -7.10543e-15,-622 -7.10543e-15,-564 94,-564 94,-622\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-596.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-581.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_10&#45;&gt;Convolution_9 -->\n",
+       "<g id=\"edge6\" class=\"edge\"><title>Activation_10&#45;&gt;Convolution_9</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-553.744C47,-545.204 47,-536.298 47,-528.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-553.897 47,-563.897 50.5001,-553.897 43.5001,-553.897\"/>\n",
+       "</g>\n",
+       "<!-- Pooling_11 -->\n",
+       "<g id=\"node8\" class=\"node\"><title>Pooling_11</title>\n",
+       "<polygon fill=\"#ee2c2c\" stroke=\"#ee2c2c\" points=\"94,-716 -7.10543e-15,-716 -7.10543e-15,-658 94,-658 94,-716\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-690.8\" font-family=\"Times,serif\" font-size=\"14.00\">Pooling</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-675.8\" font-family=\"Times,serif\" font-size=\"14.00\">max, 3x3/2</text>\n",
+       "</g>\n",
+       "<!-- Pooling_11&#45;&gt;Activation_10 -->\n",
+       "<g id=\"edge7\" class=\"edge\"><title>Pooling_11&#45;&gt;Activation_10</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-647.744C47,-639.204 47,-630.298 47,-622.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-647.897 47,-657.897 50.5001,-647.897 43.5001,-647.897\"/>\n",
+       "</g>\n",
+       "<!-- LRN_12 -->\n",
+       "<g id=\"node9\" class=\"node\"><title>LRN_12</title>\n",
+       "<polygon fill=\"#c0ff3e\" stroke=\"#c0ff3e\" points=\"94,-810 -7.10543e-15,-810 -7.10543e-15,-752 94,-752 94,-810\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-777.3\" font-family=\"Times,serif\" font-size=\"14.00\">LRN</text>\n",
+       "</g>\n",
+       "<!-- LRN_12&#45;&gt;Pooling_11 -->\n",
+       "<g id=\"edge8\" class=\"edge\"><title>LRN_12&#45;&gt;Pooling_11</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-741.744C47,-733.204 47,-724.298 47,-716.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-741.897 47,-751.897 50.5001,-741.897 43.5001,-741.897\"/>\n",
+       "</g>\n",
+       "<!-- Convolution_15 -->\n",
+       "<g id=\"node10\" class=\"node\"><title>Convolution_15</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-904 -7.10543e-15,-904 -7.10543e-15,-846 94,-846 94,-904\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-878.8\" font-family=\"Times,serif\" font-size=\"14.00\">Convolution</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-863.8\" font-family=\"Times,serif\" font-size=\"14.00\">3x3/1, 384</text>\n",
+       "</g>\n",
+       "<!-- Convolution_15&#45;&gt;LRN_12 -->\n",
+       "<g id=\"edge9\" class=\"edge\"><title>Convolution_15&#45;&gt;LRN_12</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-835.744C47,-827.204 47,-818.298 47,-810.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-835.897 47,-845.897 50.5001,-835.897 43.5001,-835.897\"/>\n",
+       "</g>\n",
+       "<!-- Activation_16 -->\n",
+       "<g id=\"node11\" class=\"node\"><title>Activation_16</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-998 -7.10543e-15,-998 -7.10543e-15,-940 94,-940 94,-998\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-972.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-957.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_16&#45;&gt;Convolution_15 -->\n",
+       "<g id=\"edge10\" class=\"edge\"><title>Activation_16&#45;&gt;Convolution_15</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-929.744C47,-921.204 47,-912.298 47,-904.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-929.897 47,-939.897 50.5001,-929.897 43.5001,-929.897\"/>\n",
+       "</g>\n",
+       "<!-- Convolution_19 -->\n",
+       "<g id=\"node12\" class=\"node\"><title>Convolution_19</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-1092 -7.10543e-15,-1092 -7.10543e-15,-1034 94,-1034 94,-1092\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1066.8\" font-family=\"Times,serif\" font-size=\"14.00\">Convolution</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1051.8\" font-family=\"Times,serif\" font-size=\"14.00\">3x3/1, 384</text>\n",
+       "</g>\n",
+       "<!-- Convolution_19&#45;&gt;Activation_16 -->\n",
+       "<g id=\"edge11\" class=\"edge\"><title>Convolution_19&#45;&gt;Activation_16</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1023.74C47,-1015.2 47,-1006.3 47,-998.248\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1023.9 47,-1033.9 50.5001,-1023.9 43.5001,-1023.9\"/>\n",
+       "</g>\n",
+       "<!-- Activation_20 -->\n",
+       "<g id=\"node13\" class=\"node\"><title>Activation_20</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-1186 -7.10543e-15,-1186 -7.10543e-15,-1128 94,-1128 94,-1186\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1160.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1145.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_20&#45;&gt;Convolution_19 -->\n",
+       "<g id=\"edge12\" class=\"edge\"><title>Activation_20&#45;&gt;Convolution_19</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1117.74C47,-1109.2 47,-1100.3 47,-1092.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1117.9 47,-1127.9 50.5001,-1117.9 43.5001,-1117.9\"/>\n",
+       "</g>\n",
+       "<!-- Convolution_23 -->\n",
+       "<g id=\"node14\" class=\"node\"><title>Convolution_23</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-1280 -7.10543e-15,-1280 -7.10543e-15,-1222 94,-1222 94,-1280\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1254.8\" font-family=\"Times,serif\" font-size=\"14.00\">Convolution</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1239.8\" font-family=\"Times,serif\" font-size=\"14.00\">3x3/1, 256</text>\n",
+       "</g>\n",
+       "<!-- Convolution_23&#45;&gt;Activation_20 -->\n",
+       "<g id=\"edge13\" class=\"edge\"><title>Convolution_23&#45;&gt;Activation_20</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1211.74C47,-1203.2 47,-1194.3 47,-1186.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1211.9 47,-1221.9 50.5001,-1211.9 43.5001,-1211.9\"/>\n",
+       "</g>\n",
+       "<!-- Activation_24 -->\n",
+       "<g id=\"node15\" class=\"node\"><title>Activation_24</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-1374 -7.10543e-15,-1374 -7.10543e-15,-1316 94,-1316 94,-1374\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1348.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1333.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_24&#45;&gt;Convolution_23 -->\n",
+       "<g id=\"edge14\" class=\"edge\"><title>Activation_24&#45;&gt;Convolution_23</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1305.74C47,-1297.2 47,-1288.3 47,-1280.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1305.9 47,-1315.9 50.5001,-1305.9 43.5001,-1305.9\"/>\n",
+       "</g>\n",
+       "<!-- Pooling_25 -->\n",
+       "<g id=\"node16\" class=\"node\"><title>Pooling_25</title>\n",
+       "<polygon fill=\"#ee2c2c\" stroke=\"#ee2c2c\" points=\"94,-1468 -7.10543e-15,-1468 -7.10543e-15,-1410 94,-1410 94,-1468\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1442.8\" font-family=\"Times,serif\" font-size=\"14.00\">Pooling</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1427.8\" font-family=\"Times,serif\" font-size=\"14.00\">max, 3x3/2</text>\n",
+       "</g>\n",
+       "<!-- Pooling_25&#45;&gt;Activation_24 -->\n",
+       "<g id=\"edge15\" class=\"edge\"><title>Pooling_25&#45;&gt;Activation_24</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1399.74C47,-1391.2 47,-1382.3 47,-1374.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1399.9 47,-1409.9 50.5001,-1399.9 43.5001,-1399.9\"/>\n",
+       "</g>\n",
+       "<!-- Flatten_26 -->\n",
+       "<g id=\"node17\" class=\"node\"><title>Flatten_26</title>\n",
+       "<polygon fill=\"#54ff9f\" stroke=\"#54ff9f\" points=\"94,-1562 -7.10543e-15,-1562 -7.10543e-15,-1504 94,-1504 94,-1562\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1529.3\" font-family=\"Times,serif\" font-size=\"14.00\">Flatten</text>\n",
+       "</g>\n",
+       "<!-- Flatten_26&#45;&gt;Pooling_25 -->\n",
+       "<g id=\"edge16\" class=\"edge\"><title>Flatten_26&#45;&gt;Pooling_25</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1493.74C47,-1485.2 47,-1476.3 47,-1468.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1493.9 47,-1503.9 50.5001,-1493.9 43.5001,-1493.9\"/>\n",
+       "</g>\n",
+       "<!-- FullyConnected_29 -->\n",
+       "<g id=\"node18\" class=\"node\"><title>FullyConnected_29</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-1656 -7.10543e-15,-1656 -7.10543e-15,-1598 94,-1598 94,-1656\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1630.8\" font-family=\"Times,serif\" font-size=\"14.00\">FullyConnected</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1615.8\" font-family=\"Times,serif\" font-size=\"14.00\">4096</text>\n",
+       "</g>\n",
+       "<!-- FullyConnected_29&#45;&gt;Flatten_26 -->\n",
+       "<g id=\"edge17\" class=\"edge\"><title>FullyConnected_29&#45;&gt;Flatten_26</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1587.74C47,-1579.2 47,-1570.3 47,-1562.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1587.9 47,-1597.9 50.5001,-1587.9 43.5001,-1587.9\"/>\n",
+       "</g>\n",
+       "<!-- Activation_30 -->\n",
+       "<g id=\"node19\" class=\"node\"><title>Activation_30</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-1750 -7.10543e-15,-1750 -7.10543e-15,-1692 94,-1692 94,-1750\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1724.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1709.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_30&#45;&gt;FullyConnected_29 -->\n",
+       "<g id=\"edge18\" class=\"edge\"><title>Activation_30&#45;&gt;FullyConnected_29</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1681.74C47,-1673.2 47,-1664.3 47,-1656.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1681.9 47,-1691.9 50.5001,-1681.9 43.5001,-1681.9\"/>\n",
+       "</g>\n",
+       "<!-- Dropout_31 -->\n",
+       "<g id=\"node20\" class=\"node\"><title>Dropout_31</title>\n",
+       "<polygon fill=\"#c0ff3e\" stroke=\"#c0ff3e\" points=\"94,-1844 -7.10543e-15,-1844 -7.10543e-15,-1786 94,-1786 94,-1844\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1811.3\" font-family=\"Times,serif\" font-size=\"14.00\">Dropout</text>\n",
+       "</g>\n",
+       "<!-- Dropout_31&#45;&gt;Activation_30 -->\n",
+       "<g id=\"edge19\" class=\"edge\"><title>Dropout_31&#45;&gt;Activation_30</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1775.74C47,-1767.2 47,-1758.3 47,-1750.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1775.9 47,-1785.9 50.5001,-1775.9 43.5001,-1775.9\"/>\n",
+       "</g>\n",
+       "<!-- FullyConnected_34 -->\n",
+       "<g id=\"node21\" class=\"node\"><title>FullyConnected_34</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-1938 -7.10543e-15,-1938 -7.10543e-15,-1880 94,-1880 94,-1938\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1912.8\" font-family=\"Times,serif\" font-size=\"14.00\">FullyConnected</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1897.8\" font-family=\"Times,serif\" font-size=\"14.00\">4096</text>\n",
+       "</g>\n",
+       "<!-- FullyConnected_34&#45;&gt;Dropout_31 -->\n",
+       "<g id=\"edge20\" class=\"edge\"><title>FullyConnected_34&#45;&gt;Dropout_31</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1869.74C47,-1861.2 47,-1852.3 47,-1844.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1869.9 47,-1879.9 50.5001,-1869.9 43.5001,-1869.9\"/>\n",
+       "</g>\n",
+       "<!-- Activation_35 -->\n",
+       "<g id=\"node22\" class=\"node\"><title>Activation_35</title>\n",
+       "<polygon fill=\"salmon\" stroke=\"salmon\" points=\"94,-2032 -7.10543e-15,-2032 -7.10543e-15,-1974 94,-1974 94,-2032\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-2006.8\" font-family=\"Times,serif\" font-size=\"14.00\">Activation</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-1991.8\" font-family=\"Times,serif\" font-size=\"14.00\">relu</text>\n",
+       "</g>\n",
+       "<!-- Activation_35&#45;&gt;FullyConnected_34 -->\n",
+       "<g id=\"edge21\" class=\"edge\"><title>Activation_35&#45;&gt;FullyConnected_34</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-1963.74C47,-1955.2 47,-1946.3 47,-1938.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-1963.9 47,-1973.9 50.5001,-1963.9 43.5001,-1963.9\"/>\n",
+       "</g>\n",
+       "<!-- Dropout_36 -->\n",
+       "<g id=\"node23\" class=\"node\"><title>Dropout_36</title>\n",
+       "<polygon fill=\"#c0ff3e\" stroke=\"#c0ff3e\" points=\"94,-2126 -7.10543e-15,-2126 -7.10543e-15,-2068 94,-2068 94,-2126\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-2093.3\" font-family=\"Times,serif\" font-size=\"14.00\">Dropout</text>\n",
+       "</g>\n",
+       "<!-- Dropout_36&#45;&gt;Activation_35 -->\n",
+       "<g id=\"edge22\" class=\"edge\"><title>Dropout_36&#45;&gt;Activation_35</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-2057.74C47,-2049.2 47,-2040.3 47,-2032.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-2057.9 47,-2067.9 50.5001,-2057.9 43.5001,-2057.9\"/>\n",
+       "</g>\n",
+       "<!-- FullyConnected_39 -->\n",
+       "<g id=\"node24\" class=\"node\"><title>FullyConnected_39</title>\n",
+       "<polygon fill=\"#4876ff\" stroke=\"#4876ff\" points=\"94,-2220 -7.10543e-15,-2220 -7.10543e-15,-2162 94,-2162 94,-2220\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-2194.8\" font-family=\"Times,serif\" font-size=\"14.00\">FullyConnected</text>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-2179.8\" font-family=\"Times,serif\" font-size=\"14.00\">1000</text>\n",
+       "</g>\n",
+       "<!-- FullyConnected_39&#45;&gt;Dropout_36 -->\n",
+       "<g id=\"edge23\" class=\"edge\"><title>FullyConnected_39&#45;&gt;Dropout_36</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-2151.74C47,-2143.2 47,-2134.3 47,-2126.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-2151.9 47,-2161.9 50.5001,-2151.9 43.5001,-2151.9\"/>\n",
+       "</g>\n",
+       "<!-- Softmax_41 -->\n",
+       "<g id=\"node25\" class=\"node\"><title>Softmax_41</title>\n",
+       "<polygon fill=\"#c0ff3e\" stroke=\"#c0ff3e\" points=\"94,-2314 -7.10543e-15,-2314 -7.10543e-15,-2256 94,-2256 94,-2314\"/>\n",
+       "<text text-anchor=\"middle\" x=\"47\" y=\"-2281.3\" font-family=\"Times,serif\" font-size=\"14.00\">Softmax</text>\n",
+       "</g>\n",
+       "<!-- Softmax_41&#45;&gt;FullyConnected_39 -->\n",
+       "<g id=\"edge24\" class=\"edge\"><title>Softmax_41&#45;&gt;FullyConnected_39</title>\n",
+       "<path fill=\"none\" stroke=\"black\" d=\"M47,-2245.74C47,-2237.2 47,-2228.3 47,-2220.25\"/>\n",
+       "<polygon fill=\"black\" stroke=\"black\" points=\"43.5001,-2245.9 47,-2255.9 50.5001,-2245.9 43.5001,-2245.9\"/>\n",
+       "</g>\n",
+       "</g>\n",
+       "</svg>\n"
+      ],
+      "text/plain": [
+       "<graphviz.dot.Digraph at 0x7f01c5b9e2e8>"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mx.visualization.network2dot(\"AlexNet\", softmax)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "collapsed": true
+   },
+   "source": [
+    "After define our network, we are able to create our model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.4.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py
index e3b17baa1b31..b5429a7bd816 100644
--- a/python/mxnet/__init__.py
+++ b/python/mxnet/__init__.py
@@ -21,6 +21,7 @@
 from . import optimizer
 from . import model
 from . import initializer
+from . import visualization
 import atexit
 
 __version__ = "0.1.0"
diff --git a/python/mxnet/metric.py b/python/mxnet/metric.py
index f58e90f4ac52..ff100c12c191 100644
--- a/python/mxnet/metric.py
+++ b/python/mxnet/metric.py
@@ -1,6 +1,6 @@
+# pylint: disable=invalid-name
 """Online evaluation metric module."""
 import numpy as np
-from .ndarray import NDArray
 
 class EvalMetric(object):
     """Base class of all evaluation metrics."""
@@ -8,7 +8,7 @@ def __init__(self, name):
         self.name = name
         self.reset()
 
-    def update(pred, label):
+    def update(self, pred, label):
         """Update the internal evaluation.
 
         Parameters
@@ -40,6 +40,7 @@ def get(self):
 
 
 class Accuracy(EvalMetric):
+    """Calculate accuracy"""
     def __init__(self):
         super(Accuracy, self).__init__('accuracy')
 
diff --git a/python/mxnet/model.py b/python/mxnet/model.py
index f1cda62a1e53..726be0d7eb45 100644
--- a/python/mxnet/model.py
+++ b/python/mxnet/model.py
@@ -1,11 +1,12 @@
-# pylint: skip-file
+# pylint: disable=fixme, invalid-name, too-many-arguments, too-many-locals, no-member
+# pylint: disable=too-many-branches, too-many-statements, unused-argument, unused-variable
+"""MXNet model module"""
 import numpy as np
 import time
 from . import io
 from . import nd
 from . import optimizer as opt
 from . import metric
-from .symbol import Symbol
 from .context import Context
 from .initializer import Xavier
 
@@ -20,7 +21,7 @@
 
 
 def _train(symbol, ctx, input_shape,
-           arg_params, aux_states,
+           arg_params, aux_params,
            begin_round, end_round, optimizer,
            train_data, eval_data=None, eval_metric=None,
            iter_end_callback=None, verbose=True):
@@ -40,7 +41,7 @@ def _train(symbol, ctx, input_shape,
     arg_params : dict of str to NDArray
         Model parameter, dict of name to NDArray of net's weights.
 
-    aux_states : dict of str to NDArray
+    aux_params : dict of str to NDArray
         Model parameter, dict of name to NDArray of net's auxiliary states.
 
     begin_round : int
@@ -81,16 +82,16 @@ def _train(symbol, ctx, input_shape,
     grad_arrays = train_exec.grad_arrays
     aux_arrays = train_exec.aux_arrays
     # copy initialized parameters to executor parameters
-    for key, weight in zip(arg_names, arg_arrays):
+    for key, weight in list(zip(arg_names, arg_arrays)):
         if key in arg_params:
             arg_params[key].copyto(weight)
-    for key, weight in zip(aux_names, aux_arrays):
+    for key, weight in list(zip(aux_names, aux_arrays)):
         if key in aux_params:
             aux_params[key].copyto(weight)
     # setup helper data structures
     label_array = None
     data_array = None
-    for name, arr in zip(symbol.list_arguments(),  arg_arrays):
+    for name, arr in list(zip(symbol.list_arguments(), arg_arrays)):
         if name.endswith('label'):
             assert label_array is None
             label_array = arr
@@ -151,10 +152,10 @@ def _train(symbol, ctx, input_shape,
             for key, weight, gard in arg_blocks:
                 if key in arg_params:
                     weight.copyto(arg_params[key])
-            for key, arr in zip(aux_names, aux_states):
-                arr.copyto(aux_states[key])
+            for key, arr in list(zip(aux_names, aux_arrays)):
+                arr.copyto(aux_params[key])
         if iter_end_callback:
-            iter_end_callback(i, arg_params, aux_states)
+            iter_end_callback(i, arg_params, aux_arrays)
     # end of the function
     return
 
@@ -224,11 +225,11 @@ def _init_params(self):
         arg_shapes, _, aux_shapes = self.symbol.infer_shape(data=self.input_shape)
         if self.arg_params is None:
             arg_names = self.symbol.list_arguments()
-            self.arg_params = {k : nd.zeros(s) for k, s in zip(arg_names, arg_shapes)
+            self.arg_params = {k : nd.zeros(s) for k, s in list(zip(arg_names, arg_shapes))
                                if not is_data_arg(k)}
         if self.aux_states is None:
             aux_names = self.symbol.list_auxiliary_states()
-            self.aux_states = {k : nd.zeros(s) for k, s in zip(aux_names, aux_shapes)}
+            self.aux_states = {k : nd.zeros(s) for k, s in list(zip(aux_names, aux_shapes))}
         for k, v in self.arg_params.items():
             self.initializer(k, v)
         for k, v in self.aux_states.items():
@@ -241,7 +242,7 @@ def _init_predictor(self):
         # for now only use the first device
         pred_exec = self.symbol.simple_bind(
             self.ctx[0], grad_req='null', data=self.input_shape)
-        for name, value in zip(self.symbol.list_arguments(), pred_exec.arg_arrays):
+        for name, value in list(zip(self.symbol.list_arguments(), pred_exec.arg_arrays)):
             if name not in self.arg_datas:
                 assert name in self.arg_params
                 self.arg_params[name].copyto(value)
diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py
index 682071148c7e..8118e23f2bf6 100644
--- a/python/mxnet/optimizer.py
+++ b/python/mxnet/optimizer.py
@@ -1,4 +1,4 @@
-# pylint: skip-file
+# pylint: disable=fixme, invalid-name
 """Common Optimization algorithms with regularizations."""
 from .ndarray import NDArray, zeros
 
diff --git a/python/mxnet/symbol.py b/python/mxnet/symbol.py
index 78d6d9187c45..006bc66a5223 100644
--- a/python/mxnet/symbol.py
+++ b/python/mxnet/symbol.py
@@ -423,7 +423,6 @@ def simple_bind(self, ctx, grad_req='write', **kwargs):
         arg_ndarrays = [zeros(shape, ctx) for shape in arg_shapes]
 
         if grad_req != 'null':
-            req = {}
             grad_ndarrays = {}
             for name, shape in zip(self.list_arguments(), arg_shapes):
                 if not (name.endswith('data') or name.endswith('label')):
diff --git a/python/mxnet/visualization.py b/python/mxnet/visualization.py
new file mode 100644
index 000000000000..bccc4a2b3155
--- /dev/null
+++ b/python/mxnet/visualization.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+# pylint: disable=invalid-name, protected-access, too-many-locals, fixme
+# pylint: disable=unused-argument, too-many-branches, too-many-statements
+"""Visualization module"""
+from .symbol import Symbol
+import json
+import re
+import copy
+
+
+def _str2tuple(string):
+    """convert shape string to list, internal use only
+
+    Parameters
+    ----------
+    string: str
+        shape string
+
+    Returns
+    -------
+    list of str to represent shape
+    """
+    return re.findall(r"\d+", string)
+
+
+def network2dot(title, symbol, shape=None):
+    """convert symbol to dot object for visualization
+
+    Parameters
+    ----------
+    title: str
+        title of the dot graph
+    symbol: Symbol
+        symbol to be visualized
+    shape: TODO
+        TODO
+
+    Returns
+    ------
+    dot: Diagraph
+        dot object of symbol
+    """
+    # todo add shape support
+    try:
+        from graphviz import Digraph
+    except:
+        raise ImportError("Draw network requires graphviz library")
+    if not isinstance(symbol, Symbol):
+        raise TypeError("symbol must be Symbol")
+    conf = json.loads(symbol.tojson())
+    nodes = conf["nodes"]
+    heads = set(conf["heads"][0])  # TODO(xxx): check careful
+    node_attr = {"shape": "box", "fixedsize": "true",
+                 "width": "1.3", "height": "0.8034", "style": "filled"}
+    dot = Digraph(name=title)
+    # make nodes
+    for i in range(len(nodes)):
+        node = nodes[i]
+        op = node["op"]
+        name = "%s_%d" % (op, i)
+        # input data
+        if i in heads and op == "null":
+            label = node["name"]
+            attr = copy.deepcopy(node_attr)
+            dot.node(name=name, label=label, **attr)
+        if op == "null":
+            continue
+        elif op == "Convolution":
+            label = "Convolution\n%sx%s/%s, %s" % (_str2tuple(node["param"]["kernel"])[0],
+                                                   _str2tuple(node["param"]["kernel"])[1],
+                                                   _str2tuple(node["param"]["stride"])[0],
+                                                   node["param"]["num_filter"])
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "royalblue1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "FullyConnected":
+            label = "FullyConnected\n%s" % node["param"]["num_hidden"]
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "royalblue1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "BatchNorm":
+            label = "BatchNorm"
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "orchid1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "Concat":
+            label = "Concat"
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "seagreen1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "Flatten":
+            label = "Flatten"
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "seagreen1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "Reshape":
+            label = "Reshape"
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "seagreen1"
+            dot.node(name=name, label=label, **attr)
+        elif op == "Pooling":
+            label = "Pooling\n%s, %sx%s/%s" % (node["param"]["pool_type"],
+                                               _str2tuple(node["param"]["kernel"])[0],
+                                               _str2tuple(node["param"]["kernel"])[1],
+                                               _str2tuple(node["param"]["stride"])[0])
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "firebrick2"
+            dot.node(name=name, label=label, **attr)
+        elif op == "Activation" or op == "LeakyReLU":
+            label = "%s\n%s" % (op, node["param"]["act_type"])
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "salmon"
+            dot.node(name=name, label=label, **attr)
+        else:
+            label = op
+            attr = copy.deepcopy(node_attr)
+            attr["color"] = "olivedrab1"
+            dot.node(name=name, label=label, **attr)
+
+    # add edges
+    for i in range(len(nodes)):
+        node = nodes[i]
+        op = node["op"]
+        name = "%s_%d" % (op, i)
+        if op == "null":
+            continue
+        else:
+            inputs = node["inputs"]
+            for item in inputs:
+                input_node = nodes[item[0]]
+                input_name = "%s_%d" % (input_node["op"], item[0])
+                if input_node["op"] != "null" or item[0] in heads:
+                    # add shape into label
+                    attr = {"dir": "back"}
+                    dot.edge(tail_name=name, head_name=input_name, **attr)
+
+    return dot