diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index aebd2047b1e..a866cee16dc 100644
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -56,6 +56,7 @@
                                 <li><b>Language Guides:</b></li>
                                 <li><a href="dml-language-reference.html">DML Language Reference</a></li>
                                 <li><a href="beginners-guide-to-dml-and-pydml.html">Beginner's Guide to DML and PyDML</a></li>
+                                <li><a href="beginners-guide-python.html">Beginner's Guide for Python users</a></li>
                                 <li class="divider"></li>
                                 <li><b>ML Algorithms:</b></li>
                                 <li><a href="algorithms-reference.html">Algorithms Reference</a></li>
diff --git a/docs/algorithms-classification.md b/docs/algorithms-classification.md
index f25d78ea459..03c78d6cac8 100644
--- a/docs/algorithms-classification.md
+++ b/docs/algorithms-classification.md
@@ -129,9 +129,9 @@ Eqs. (1) and (2).
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
+from SystemML.mllearn import LogisticRegression
 # C = 1/reg
-logistic = sml.mllearn.LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0)
+logistic = LogisticRegression(sqlCtx, fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = logistic.fit(X_train, y_train).predict(X_test)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -229,7 +229,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 # Scikit-learn way
 from sklearn import datasets, neighbors
-import SystemML as sml
+from SystemML.mllearn import LogisticRegression
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
@@ -240,12 +240,12 @@ X_train = X_digits[:.9 * n_samples]
 y_train = y_digits[:.9 * n_samples]
 X_test = X_digits[.9 * n_samples:]
 y_test = y_digits[.9 * n_samples:]
-logistic = sml.mllearn.LogisticRegression(sqlCtx)
+logistic = LogisticRegression(sqlCtx)
 print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test))
 
 # MLPipeline way
 from pyspark.ml import Pipeline
-import SystemML as sml
+from SystemML.mllearn import LogisticRegression
 from pyspark.ml.feature import HashingTF, Tokenizer
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -265,7 +265,7 @@ training = sqlCtx.createDataFrame([
 ], ["id", "text", "label"])
 tokenizer = Tokenizer(inputCol="text", outputCol="words")
 hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
-lr = sml.mllearn.LogisticRegression(sqlCtx)
+lr = LogisticRegression(sqlCtx)
 pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
 model = pipeline.fit(training)
 test = sqlCtx.createDataFrame([
@@ -458,9 +458,9 @@ support vector machine (`y` with domain size `2`).
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
+from SystemML.mllearn import SVM
 # C = 1/reg
-svm = sml.mllearn.SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False)
+svm = SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = svm.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -714,9 +714,9 @@ class labels.
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
+from SystemML.mllearn import SVM
 # C = 1/reg
-svm = sml.mllearn.SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=True)
+svm = SVM(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=True)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = svm.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -852,7 +852,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 # Scikit-learn way
 from sklearn import datasets, neighbors
-import SystemML as sml
+from SystemML.mllearn import SVM
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
 digits = datasets.load_digits()
@@ -863,12 +863,12 @@ X_train = X_digits[:.9 * n_samples]
 y_train = y_digits[:.9 * n_samples]
 X_test = X_digits[.9 * n_samples:]
 y_test = y_digits[.9 * n_samples:]
-svm = sml.mllearn.SVM(sqlCtx, is_multi_class=True)
+svm = SVM(sqlCtx, is_multi_class=True)
 print('LogisticRegression score: %f' % svm.fit(X_train, y_train).score(X_test, y_test))
 
 # MLPipeline way
 from pyspark.ml import Pipeline
-import SystemML as sml
+from SystemML.mllearn import SVM
 from pyspark.ml.feature import HashingTF, Tokenizer
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -888,7 +888,7 @@ training = sqlCtx.createDataFrame([
 ], ["id", "text", "label"])
 tokenizer = Tokenizer(inputCol="text", outputCol="words")
 hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
-svm = sml.mllearn.SVM(sqlCtx, is_multi_class=True)
+svm = SVM(sqlCtx, is_multi_class=True)
 pipeline = Pipeline(stages=[tokenizer, hashingTF, svm])
 model = pipeline.fit(training)
 test = sqlCtx.createDataFrame([
@@ -1026,8 +1026,8 @@ applicable when all features are counts of categorical values.
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
-nb = sml.mllearn.NaiveBayes(sqlCtx, laplace=1.0)
+from SystemML.mllearn import NaiveBayes
+nb = NaiveBayes(sqlCtx, laplace=1.0)
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = nb.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -1149,7 +1149,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
-import SystemML as sml
+from SystemML.mllearn import NaiveBayes
 from sklearn import metrics
 from pyspark.sql import SQLContext
 sqlCtx = SQLContext(sc)
@@ -1160,7 +1160,7 @@ vectorizer = TfidfVectorizer()
 # Both vectors and vectors_test are SciPy CSR matrix
 vectors = vectorizer.fit_transform(newsgroups_train.data)
 vectors_test = vectorizer.transform(newsgroups_test.data)
-nb = sml.mllearn.NaiveBayes(sqlCtx)
+nb = NaiveBayes(sqlCtx)
 nb.fit(vectors, newsgroups_train.target)
 pred = nb.predict(vectors_test)
 metrics.f1_score(newsgroups_test.target, pred, average='weighted')
diff --git a/docs/algorithms-regression.md b/docs/algorithms-regression.md
index 5241f5f1d68..6585b0084d2 100644
--- a/docs/algorithms-regression.md
+++ b/docs/algorithms-regression.md
@@ -82,9 +82,9 @@ efficient when the number of features $m$ is relatively small
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
+from SystemML.mllearn import LinearRegression
 # C = 1/reg
-lr = sml.mllearn.LinearRegression(sqlCtx, fit_intercept=True, C=1.0, solver='direct-solve')
+lr = LinearRegression(sqlCtx, fit_intercept=True, C=1.0, solver='direct-solve')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrame or SciPy Sparse Matrix
 y_test = lr.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -124,9 +124,9 @@ y_test = lr.fit(df_train)
 <div class="codetabs">
 <div data-lang="Python" markdown="1">
 {% highlight python %}
-import SystemML as sml
+from SystemML.mllearn import LinearRegression
 # C = 1/reg
-lr = sml.mllearn.LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg')
+lr = LinearRegression(sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg')
 # X_train, y_train and X_test can be NumPy matrices or Pandas DataFrames or SciPy Sparse matrices
 y_test = lr.fit(X_train, y_train)
 # df_train is DataFrame that contains two columns: "features" (of type Vector) and "label". df_test is a DataFrame that contains the column "features"
@@ -222,7 +222,7 @@ SystemML Language Reference for details.
 {% highlight python %}
 import numpy as np
 from sklearn import datasets
-import SystemML as sml
+from SystemML.mllearn import LinearRegression
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()
@@ -235,7 +235,7 @@ diabetes_X_test = diabetes_X[-20:]
 diabetes_y_train = diabetes.target[:-20]
 diabetes_y_test = diabetes.target[-20:]
 # Create linear regression object
-regr = sml.mllearn.LinearRegression(sqlCtx, solver='direct-solve')
+regr = LinearRegression(sqlCtx, solver='direct-solve')
 # Train the model using the training sets
 regr.fit(diabetes_X_train, diabetes_y_train)
 # The mean square error
@@ -277,7 +277,7 @@ print("Residual sum of squares: %.2f" % np.mean((regr.predict(diabetes_X_test) -
 {% highlight python %}
 import numpy as np
 from sklearn import datasets
-import SystemML as sml
+from SystemML.mllearn import LinearRegression
 from pyspark.sql import SQLContext
 # Load the diabetes dataset
 diabetes = datasets.load_diabetes()
@@ -290,7 +290,7 @@ diabetes_X_test = diabetes_X[-20:]
 diabetes_y_train = diabetes.target[:-20]
 diabetes_y_test = diabetes.target[-20:]
 # Create linear regression object
-regr = sml.mllearn.LinearRegression(sqlCtx, solver='newton-cg')
+regr = LinearRegression(sqlCtx, solver='newton-cg')
 # Train the model using the training sets
 regr.fit(diabetes_X_train, diabetes_y_train)
 # The mean square error
diff --git a/docs/beginners-guide-python.md b/docs/beginners-guide-python.md
new file mode 100644
index 00000000000..790ed43b5d4
--- /dev/null
+++ b/docs/beginners-guide-python.md
@@ -0,0 +1,334 @@
+---
+layout: global
+title: Beginner's Guide for Python users
+description: Beginner's Guide for Python users
+---
+<!--
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to you under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+-->
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+<br/>
+
+## Introduction
+
+SystemML enables flexible, scalable machine learning. This flexibility is achieved through the specification of a high-level declarative machine learning language that comes in two flavors, 
+one with an R-like syntax (DML) and one with a Python-like syntax (PyDML).
+
+Algorithm scripts written in DML and PyDML can be run on Hadoop, on Spark, or in Standalone mode. 
+No script modifications are required to change between modes. SystemML automatically performs advanced optimizations 
+based on data and cluster characteristics, so much of the need to manually tweak algorithms is largely reduced or eliminated.
+To understand more about DML and PyDML, we recommend that you read [Beginner's Guide to DML and PyDML](https://apache.github.io/incubator-systemml/beginners-guide-to-dml-and-pydml.html).
+
+For convenience of Python users, SystemML exposes several language-level APIs that allow Python users to use SystemML
+and its algorithms without the need to know DML or PyDML. We explain these APIs in the below sections with example usecases.
+
+## Download & Setup
+
+Before you get started on SystemML, make sure that your environment is set up and ready to go.
+
+### Install Java (need Java 8) and Apache Spark
+
+If you already have a Apache Spark installation, you can skip this step.
+  
+<div class="codetabs">
+<div data-lang="OSX" markdown="1">
+```bash
+/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
+brew tap caskroom/cask
+brew install Caskroom/cask/java
+brew install apache-spark
+```
+</div>
+<div data-lang="Linux" markdown="1">
+```bash
+ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Linuxbrew/install/master/install)"
+brew tap caskroom/cask
+brew install Caskroom/cask/java
+brew install apache-spark
+```
+</div>
+</div>
+
+### Install SystemML
+
+#### Step 1: Install SystemML Python package 
+
+```bash
+pip install SystemML
+```
+
+#### Step 2: Download SystemML Java binaries
+
+SystemML Python package downloads the corresponding Java binaries (along with algorithms) and places them 
+into the installed location. To find the location of the downloaded Java binaries, use the following command:
+
+```bash
+python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'
+```
+
+#### Step 3: (Optional but recommended) Set SYSTEMML_HOME environment variable
+<div class="codetabs">
+<div data-lang="OSX" markdown="1">
+```bash
+SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'`
+# If you are using zsh or ksh or csh, append it to ~/.zshrc or ~/.profile or ~/.login respectively.
+echo '' >> ~/.bashrc
+echo 'export SYSTEMML_HOME='$SYSTEMML_HOME >> ~/.bashrc
+```
+</div>
+<div data-lang="Linux" markdown="1">
+```bash
+SYSTEMML_HOME=`python -c 'import imp; import os; print os.path.join(imp.find_module("SystemML")[1], "SystemML-java")'`
+# If you are using zsh or ksh or csh, append it to ~/.zshrc or ~/.profile or ~/.login respectively.
+echo '' >> ~/.bashrc
+echo 'export SYSTEMML_HOME='$SYSTEMML_HOME >> ~/.bashrc
+```
+</div>
+</div>
+
+Note: the user is free to either use the prepackaged Java binaries 
+or download them from [SystemML website](http://systemml.apache.org/download.html) 
+or build them from the [source](https://github.com/apache/incubator-systemml).
+
+### Start Pyspark shell
+
+<div class="codetabs">
+<div data-lang="OSX" markdown="1">
+```bash
+pyspark --master local[*] --driver-class-path $SYSTEMML_HOME"/SystemML.jar"
+```
+</div>
+<div data-lang="Linux" markdown="1">
+```bash
+pyspark --master local[*] --driver-class-path $SYSTEMML_HOME"/SystemML.jar"
+```
+</div>
+</div>
+
+## Matrix operations
+
+To get started with SystemML, let's try few elementary matrix multiplication operations:
+
+```python
+import SystemML as sml
+import numpy as np
+sml.setSparkContext(sc)
+m1 = sml.matrix(np.ones((3,3)) + 2)
+m2 = sml.matrix(np.ones((3,3)) + 3)
+m2 = m1 * (m2 + m1)
+m4 = 1.0 - m2
+m4.sum(axis=1).toNumPyArray()
+```
+
+Output:
+
+```bash
+array([[-60.],
+       [-60.],
+       [-60.]])
+```
+
+Let us now write a simple script to train [linear regression](https://apache.github.io/incubator-systemml/algorithms-regression.html#linear-regression) 
+model: $ \beta = solve(X^T X, X^T y) $. For simplicity, we will use direct-solve method and ignore regularization parameter as well as intercept. 
+
+```python
+import numpy as np
+from sklearn import datasets
+import SystemML as sml
+from pyspark.sql import SQLContext
+# Load the diabetes dataset
+diabetes = datasets.load_diabetes()
+# Use only one feature
+diabetes_X = diabetes.data[:, np.newaxis, 2]
+# Split the data into training/testing sets
+X_train = diabetes_X[:-20]
+X_test = diabetes_X[-20:]
+# Split the targets into training/testing sets
+y_train = diabetes.target[:-20]
+y_test = diabetes.target[-20:]
+# Train Linear Regression model
+sml.setSparkContext(sc)
+X = sml.matrix(X_train)
+y = sml.matrix(y_train)
+A = X.transpose().dot(X)
+b = X.transpose().dot(y)
+beta = sml.solve(A, b).toNumPyArray()
+y_predicted = X_test.dot(beta)
+print('Residual sum of squares: %.2f' % np.mean((y_predicted - y_test) ** 2)) 
+```
+
+Output:
+
+```bash
+Residual sum of squares: 25282.12
+```
+
+We can improve the residual error by adding an intercept and regularization parameter. To do so, we will use `mllearn` API described in the next section.
+
+## Invoke SystemML's algorithms
+
+SystemML also exposes a subpackage `mllearn`. This subpackage allows Python users to invoke SystemML algorithms
+using Scikit-learn or MLPipeline API.  
+
+### Scikit-learn interface
+
+In the below example, we invoke SystemML's [Linear Regression](https://apache.github.io/incubator-systemml/algorithms-regression.html#linear-regression)
+algorithm.
+ 
+```python
+import numpy as np
+from sklearn import datasets
+from SystemML.mllearn import LinearRegression
+from pyspark.sql import SQLContext
+# Load the diabetes dataset
+diabetes = datasets.load_diabetes()
+# Use only one feature
+diabetes_X = diabetes.data[:, np.newaxis, 2]
+# Split the data into training/testing sets
+X_train = diabetes_X[:-20]
+X_test = diabetes_X[-20:]
+# Split the targets into training/testing sets
+y_train = diabetes.target[:-20]
+y_test = diabetes.target[-20:]
+# Create linear regression object
+regr = LinearRegression(sqlCtx, fit_intercept=True, C=1, solver='direct-solve')
+# Train the model using the training sets
+regr.fit(X_train, y_train)
+y_predicted = regr.predict(X_test)
+print('Residual sum of squares: %.2f' % np.mean((y_predicted - y_test) ** 2)) 
+```
+
+Output:
+
+```bash
+Residual sum of squares: 6991.17
+```
+
+As expected, by adding intercept and regularizer the residual error drops significantly.
+
+Here is another example that where we invoke SystemML's [Logistic Regression](https://apache.github.io/incubator-systemml/algorithms-classification.html#multinomial-logistic-regression)
+algorithm on digits datasets.
+
+```python
+# Scikit-learn way
+from sklearn import datasets, neighbors
+from SystemML.mllearn import LogisticRegression
+from pyspark.sql import SQLContext
+sqlCtx = SQLContext(sc)
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target + 1
+n_samples = len(X_digits)
+X_train = X_digits[:.9 * n_samples]
+y_train = y_digits[:.9 * n_samples]
+X_test = X_digits[.9 * n_samples:]
+y_test = y_digits[.9 * n_samples:]
+logistic = LogisticRegression(sqlCtx)
+print('LogisticRegression score: %f' % logistic.fit(X_train, y_train).score(X_test, y_test))
+```
+
+### Passing PySpark DataFrame
+
+To train the above algorithm on larger dataset, we can load the dataset into DataFrame and pass it to the `fit` method:
+
+```python
+from sklearn import datasets, neighbors
+from SystemML.mllearn import LogisticRegression
+from pyspark.sql import SQLContext
+import SystemML as sml
+sqlCtx = SQLContext(sc)
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target + 1
+n_samples = len(X_digits)
+# Split the data into training/testing sets and convert to PySpark DataFrame
+df_train = sml.convertToLabeledDF(sqlContext, X_digits[:.9 * n_samples], y_digits[:.9 * n_samples])
+X_test = X_digits[.9 * n_samples:]
+y_test = y_digits[.9 * n_samples:]
+logistic = LogisticRegression(sqlCtx)
+print('LogisticRegression score: %f' % logistic.fit(df_train).score(X_test, y_test))
+```
+
+### MLPipeline interface
+
+In the below example, we demonstrate how the same `LogisticRegression` class can allow SystemML to fit seamlessly into 
+large data pipelines.
+
+```python
+# MLPipeline way
+from pyspark.ml import Pipeline
+from SystemML.mllearn import LogisticRegression
+from pyspark.ml.feature import HashingTF, Tokenizer
+from pyspark.sql import SQLContext
+sqlCtx = SQLContext(sc)
+training = sqlCtx.createDataFrame([
+    (0L, "a b c d e spark", 1.0),
+    (1L, "b d", 2.0),
+    (2L, "spark f g h", 1.0),
+    (3L, "hadoop mapreduce", 2.0),
+    (4L, "b spark who", 1.0),
+    (5L, "g d a y", 2.0),
+    (6L, "spark fly", 1.0),
+    (7L, "was mapreduce", 2.0),
+    (8L, "e spark program", 1.0),
+    (9L, "a e c l", 2.0),
+    (10L, "spark compile", 1.0),
+    (11L, "hadoop software", 2.0)
+], ["id", "text", "label"])
+tokenizer = Tokenizer(inputCol="text", outputCol="words")
+hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
+lr = LogisticRegression(sqlCtx)
+pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
+model = pipeline.fit(training)
+test = sqlCtx.createDataFrame([
+    (12L, "spark i j k"),
+    (13L, "l m n"),
+    (14L, "mapreduce spark"),
+    (15L, "apache hadoop")], ["id", "text"])
+prediction = model.transform(test)
+prediction.show()
+```
+
+## Invoking DML/PyDML scripts using MLContext
+
+TODO: This is work in progress.
+
+```python
+from sklearn import datasets, neighbors
+from SystemML.mllearn import LogisticRegression
+from pyspark.sql import DataFrame, SQLContext
+import SystemML as sml
+import pandas as pd
+import os
+sqlCtx = SQLContext(sc)
+digits = datasets.load_digits()
+X_digits = digits.data
+y_digits = digits.target + 1
+n_samples = len(X_digits)
+# Split the data into training/testing sets and convert to PySpark DataFrame
+X_df = sqlCtx.createDataFrame(pd.DataFrame(X_digits[:.9 * n_samples]))
+y_df = sqlCtx.createDataFrame(pd.DataFrame(y_digits[:.9 * n_samples]))
+ml = sml.MLContext(sc)
+script = os.path.join(os.environ['SYSTEMML_HOME'], 'scripts', 'algorithms', 'MultiLogReg.dml')
+script = sml.dml(script).input(X=X_df, Y_vec=y_df).out("B_out")
+# .input($X=' ', $Y=' ', $B=' ')
+beta = ml.execute(script).getNumPyArray('B_out')
+```
diff --git a/docs/index.md b/docs/index.md
index 738e5258155..3fcece6aa38 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -68,6 +68,8 @@ DML is a high-level R-like declarative language for machine learning.
 PyDML is a high-level Python-like declarative language for machine learning.
 * [Beginner's Guide to DML and PyDML](beginners-guide-to-dml-and-pydml) -
 An introduction to the basics of DML and PyDML.
+* [Beginner's Guide for Python users](beginners-guide-python) -
+Beginner's Guide for Python users.
 
 ## ML Algorithms
 
diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
index dbc8f5da0fa..605ba952920 100644
--- a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
+++ b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java
@@ -24,6 +24,8 @@
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.rdd.RDD;
 import org.apache.spark.sql.DataFrame;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
 import org.apache.sysml.runtime.controlprogram.caching.CacheException;
@@ -37,7 +39,10 @@
 import org.apache.sysml.runtime.instructions.cp.IntObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.instructions.cp.StringObject;
+import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
+import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData;
 import org.apache.sysml.runtime.matrix.data.FrameBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.util.DataConverter;
 
 import scala.Tuple1;
@@ -115,7 +120,21 @@ public Data getData(String outputName) {
 	 */
 	public MatrixObject getMatrixObject(String outputName) {
 		Data data = getData(outputName);
-		if (!(data instanceof MatrixObject)) {
+		if(data instanceof ScalarObject) {
+			double val = getDouble(outputName);
+			MatrixObject one_X_one_mo = new MatrixObject(ValueType.DOUBLE, " ", new MatrixDimensionsMetaData(new MatrixCharacteristics(1, 1, OptimizerUtils.DEFAULT_BLOCKSIZE, OptimizerUtils.DEFAULT_BLOCKSIZE, 1)));
+			MatrixBlock mb = new MatrixBlock(1, 1, false);
+			mb.allocateDenseBlock();
+			mb.setValue(0, 0, val);
+			try {
+				one_X_one_mo.acquireModify(mb);
+				one_X_one_mo.release();
+			} catch (CacheException e) {
+				throw new RuntimeException(e);
+			}
+			return one_X_one_mo;
+		}
+		else if (!(data instanceof MatrixObject)) {
 			throw new MLContextException("Variable '" + outputName + "' not a matrix");
 		}
 		MatrixObject mo = (MatrixObject) data;
diff --git a/src/main/java/org/apache/sysml/api/python/SystemML.py b/src/main/java/org/apache/sysml/api/python/SystemML.py
index 689403ea883..3b8ae961b5c 100644
--- a/src/main/java/org/apache/sysml/api/python/SystemML.py
+++ b/src/main/java/org/apache/sysml/api/python/SystemML.py
@@ -27,18 +27,7 @@
 from pyspark.context import SparkContext 
 from pyspark.sql import DataFrame, SQLContext
 from pyspark.rdd import RDD
-import numpy as np
-import pandas as pd
-import sklearn as sk
-from sklearn import metrics
-from pyspark.ml.feature import VectorAssembler
-from pyspark.mllib.linalg import Vectors
-import sys
-from pyspark.ml import Estimator, Model
-from scipy.sparse import spmatrix
-from scipy.sparse import coo_matrix
 
-SUPPORTED_TYPES = (np.ndarray, pd.DataFrame, spmatrix)
 
 class MLContext(object):
 
@@ -244,10 +233,6 @@ def getDF(self, sqlContext, varName):
             return df
         except Py4JJavaError:
             traceback.print_exc()
-
-    def getPandasDF(self, sqlContext, varName):
-        df = self.toDF(sqlContext, varName).sort('ID').drop('ID')
-        return df.toPandas()
         
     def getMLMatrix(self, sqlContext, varName):
         raise Exception('Not supported in Python MLContext')
@@ -265,218 +250,3 @@ def getStringRDD(self, varName, format):
         #except Py4JJavaError:
         #    traceback.print_exc()
 
-def getNumCols(numPyArr):
-    if numPyArr.ndim == 1:
-        return 1
-    else:
-        return numPyArr.shape[1]
-       
-def convertToMatrixBlock(sc, src):
-    if isinstance(src, spmatrix):
-        src = coo_matrix(src,  dtype=np.float64)
-        numRows = src.shape[0]
-        numCols = src.shape[1]
-        data = src.data
-        row = src.row.astype(np.int32)
-        col = src.col.astype(np.int32)
-        nnz = len(src.col)
-        buf1 = bytearray(data.tostring())
-        buf2 = bytearray(row.tostring())
-        buf3 = bytearray(col.tostring())
-        return sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertSciPyCOOToMB(buf1, buf2, buf3, numRows, numCols, nnz)
-    elif isinstance(sc, SparkContext):
-        src = np.asarray(src)
-        numCols = getNumCols(src)
-        numRows = src.shape[0]
-        arr = src.ravel().astype(np.float64)
-        buf = bytearray(arr.tostring())
-        return sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertPy4JArrayToMB(buf, numRows, numCols)
-    else:
-        raise TypeError('sc needs to be of type SparkContext') # TODO: We can generalize this by creating py4j gateway ourselves
-    
-
-def convertToNumpyArr(sc, mb):
-    if isinstance(sc, SparkContext):
-        numRows = mb.getNumRows()
-        numCols = mb.getNumColumns()
-        buf = sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertMBtoPy4JDenseArr(mb)
-        return np.frombuffer(buf, count=numRows*numCols, dtype=np.float64)
-    else:
-        raise TypeError('sc needs to be of type SparkContext') # TODO: We can generalize this by creating py4j gateway ourselves
-
-def convertToPandasDF(X):
-    if not isinstance(X, pd.DataFrame):
-        return pd.DataFrame(X, columns=['C' + str(i) for i in range(getNumCols(X))])
-    return X
-            
-def tolist(inputCols):
-    return list(inputCols)
-
-def assemble(sqlCtx, pdf, inputCols, outputCol):
-    tmpDF = sqlCtx.createDataFrame(pdf, tolist(pdf.columns))
-    assembler = VectorAssembler(inputCols=tolist(inputCols), outputCol=outputCol)
-    return assembler.transform(tmpDF)
-
-class mllearn:
-    class BaseSystemMLEstimator(Estimator):
-    # TODO: Allow users to set featuresCol (with default 'features') and labelCol (with default 'label')
-    
-        # Returns a model after calling fit(df) on Estimator object on JVM    
-        def _fit(self, X):
-            if hasattr(X, '_jdf') and 'features' in X.columns and 'label' in X.columns:
-                self.model = self.estimator.fit(X._jdf)
-                return self
-            else:
-                raise Exception('Incorrect usage: Expected dataframe as input with features/label as columns')
-        
-        # Returns a model after calling fit(X:MatrixBlock, y:MatrixBlock) on Estimator object on JVM  
-        def fit(self, X, y=None, params=None):
-            if y is None:
-                return self._fit(X)
-            elif y is not None and isinstance(X, SUPPORTED_TYPES):
-                if self.transferUsingDF:
-                    pdfX = convertToPandasDF(X)
-                    pdfY = convertToPandasDF(y)
-                    if getNumCols(pdfY) != 1:
-                        raise Exception('y should be a column vector')
-                    if pdfX.shape[0] != pdfY.shape[0]:
-                        raise Exception('Number of rows of X and y should match')
-                    colNames = pdfX.columns
-                    pdfX['label'] = pdfY[pdfY.columns[0]]
-                    df = assemble(self.sqlCtx, pdfX, colNames, 'features').select('features', 'label')
-                    self.model = self.estimator.fit(df._jdf)
-                else:
-                    numColsy = getNumCols(y)
-                    if numColsy != 1:
-                        raise Exception('Expected y to be a column vector')
-                    self.model = self.estimator.fit(convertToMatrixBlock(self.sc, X), convertToMatrixBlock(self.sc, y))
-                if self.setOutputRawPredictionsToFalse:
-                    self.model.setOutputRawPredictions(False)
-                return self
-            else:
-                raise Exception('Unsupported input type')
-        
-        def transform(self, X):
-            return self.predict(X)
-        
-        # Returns either a DataFrame or MatrixBlock after calling transform(X:MatrixBlock, y:MatrixBlock) on Model object on JVM    
-        def predict(self, X):
-            if isinstance(X, SUPPORTED_TYPES):
-                if self.transferUsingDF:
-                    pdfX = convertToPandasDF(X)
-                    df = assemble(self.sqlCtx, pdfX, pdfX.columns, 'features').select('features')
-                    retjDF = self.model.transform(df._jdf)
-                    retDF = DataFrame(retjDF, self.sqlCtx)
-                    retPDF = retDF.sort('ID').select('prediction').toPandas()
-                    if isinstance(X, np.ndarray):
-                        return retPDF.as_matrix().flatten()
-                    else:
-                        return retPDF
-                else:
-                    retNumPy = convertToNumpyArr(self.sc, self.model.transform(convertToMatrixBlock(self.sc, X)))
-                    if isinstance(X, np.ndarray):
-                        return retNumPy
-                    else:
-                        return retNumPy # TODO: Convert to Pandas
-            elif hasattr(X, '_jdf'):
-                if 'features' in X.columns:
-                    # No need to assemble as input DF is likely coming via MLPipeline
-                    df = X
-                else:
-                    assembler = VectorAssembler(inputCols=X.columns, outputCol='features')
-                    df = assembler.transform(X)
-                retjDF = self.model.transform(df._jdf)
-                retDF = DataFrame(retjDF, self.sqlCtx)
-                # Return DF
-                return retDF.sort('ID')
-            else:
-                raise Exception('Unsupported input type')
-                
-    class BaseSystemMLClassifier(BaseSystemMLEstimator):
-
-        # Scores the predicted value with ground truth 'y'
-        def score(self, X, y):
-            return metrics.accuracy_score(y, self.predict(X))    
-    
-    class BaseSystemMLRegressor(BaseSystemMLEstimator):
-
-        # Scores the predicted value with ground truth 'y'
-        def score(self, X, y):
-            return metrics.r2_score(y, self.predict(X), multioutput='variance_weighted')
-
-    
-    # Or we can create new Python project with package structure
-    class LogisticRegression(BaseSystemMLClassifier):
-
-        # See https://apache.github.io/incubator-systemml/algorithms-reference for usage
-        def __init__(self, sqlCtx, penalty='l2', fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0, solver='newton-cg', transferUsingDF=False):
-            self.sqlCtx = sqlCtx
-            self.sc = sqlCtx._sc
-            self.uid = "logReg"
-            self.estimator = self.sc._jvm.org.apache.sysml.api.ml.LogisticRegression(self.uid, self.sc._jsc.sc())
-            self.estimator.setMaxOuterIter(max_iter)
-            self.estimator.setMaxInnerIter(max_inner_iter)
-            if C <= 0:
-                raise Exception('C has to be positive')
-            reg = 1.0 / C
-            self.estimator.setRegParam(reg)
-            self.estimator.setTol(tol)
-            self.estimator.setIcpt(int(fit_intercept))
-            self.transferUsingDF = transferUsingDF
-            self.setOutputRawPredictionsToFalse = True
-            if penalty != 'l2':
-                raise Exception('Only l2 penalty is supported')
-            if solver != 'newton-cg':
-                raise Exception('Only newton-cg solver supported')
-
-    class LinearRegression(BaseSystemMLRegressor):
-
-        # See https://apache.github.io/incubator-systemml/algorithms-reference for usage
-        def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg', transferUsingDF=False):
-            self.sqlCtx = sqlCtx
-            self.sc = sqlCtx._sc
-            self.uid = "lr"
-            if solver == 'newton-cg' or solver == 'direct-solve':
-                self.estimator = self.sc._jvm.org.apache.sysml.api.ml.LinearRegression(self.uid, self.sc._jsc.sc(), solver)
-            else:
-                raise Exception('Only newton-cg solver supported')
-            self.estimator.setMaxIter(max_iter)
-            if C <= 0:
-                raise Exception('C has to be positive')
-            reg = 1.0 / C
-            self.estimator.setRegParam(reg)
-            self.estimator.setTol(tol)
-            self.estimator.setIcpt(int(fit_intercept))
-            self.transferUsingDF = transferUsingDF
-            self.setOutputRawPredictionsToFalse = False
-
-
-    class SVM(BaseSystemMLClassifier):
-
-        # See https://apache.github.io/incubator-systemml/algorithms-reference for usage
-        def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False, transferUsingDF=False):
-            self.sqlCtx = sqlCtx
-            self.sc = sqlCtx._sc
-            self.uid = "svm"
-            self.estimator = self.sc._jvm.org.apache.sysml.api.ml.SVM(self.uid, self.sc._jsc.sc(), is_multi_class)
-            self.estimator.setMaxIter(max_iter)
-            if C <= 0:
-                raise Exception('C has to be positive')
-            reg = 1.0 / C
-            self.estimator.setRegParam(reg)
-            self.estimator.setTol(tol)
-            self.estimator.setIcpt(int(fit_intercept))
-            self.transferUsingDF = transferUsingDF
-            self.setOutputRawPredictionsToFalse = False    
-
-    class NaiveBayes(BaseSystemMLClassifier):
-
-        # See https://apache.github.io/incubator-systemml/algorithms-reference for usage
-        def __init__(self, sqlCtx, laplace=1.0, transferUsingDF=False):
-            self.sqlCtx = sqlCtx
-            self.sc = sqlCtx._sc
-            self.uid = "nb"
-            self.estimator = self.sc._jvm.org.apache.sysml.api.ml.NaiveBayes(self.uid, self.sc._jsc.sc())
-            self.estimator.setLaplace(laplace)
-            self.transferUsingDF = transferUsingDF
-            self.setOutputRawPredictionsToFalse = False
\ No newline at end of file
diff --git a/src/main/python/MANIFEST.in b/src/main/python/MANIFEST.in
new file mode 100644
index 00000000000..a18526374e7
--- /dev/null
+++ b/src/main/python/MANIFEST.in
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+include SystemML/SystemML-java/LICENSE
+include SystemML/SystemML-java/SystemML-config.xml
+include SystemML/SystemML-java/NOTICE
+include SystemML/SystemML-java/SystemML.jar
+include SystemML/SystemML-java/DISCLAIMER
+include SystemML/SystemML-java/scripts/sparkDML.sh
+recursive-include SystemML/SystemML-java/scripts/algorithms *
+recursive-include SystemML/SystemML-java/scripts/datagen *
+recursive-include SystemML/SystemML-java/scripts/utils *
\ No newline at end of file
diff --git a/src/main/python/SystemML/__init__.py b/src/main/python/SystemML/__init__.py
new file mode 100644
index 00000000000..02a940bb4f6
--- /dev/null
+++ b/src/main/python/SystemML/__init__.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from .mlcontext import *
+from .defmatrix import *
+from .converters import *
+
+__all__ = mlcontext.__all__
+__all__ += defmatrix.__all__
+__all__ += converters.__all__
\ No newline at end of file
diff --git a/src/main/python/SystemML/converters.py b/src/main/python/SystemML/converters.py
new file mode 100644
index 00000000000..9588bec38db
--- /dev/null
+++ b/src/main/python/SystemML/converters.py
@@ -0,0 +1,100 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from pyspark.context import SparkContext 
+from pyspark.sql import DataFrame, SQLContext
+from pyspark.rdd import RDD
+import numpy as np
+import pandas as pd
+import sklearn as sk
+
+from scipy.sparse import spmatrix
+from scipy.sparse import coo_matrix
+
+SUPPORTED_TYPES = (np.ndarray, pd.DataFrame, spmatrix)
+
+def getNumCols(numPyArr):
+    if numPyArr.ndim == 1:
+        return 1
+    else:
+        return numPyArr.shape[1]
+       
+def convertToLabeledDF(sqlCtx, X, y=None):
+    from pyspark.ml.feature import VectorAssembler
+    if y is not None:
+        pd1 = pd.DataFrame(X)
+        pd2 = pd.DataFrame(y, columns=['label'])
+        pdf = pd.concat([pd1, pd2], axis=1)
+        inputColumns = ['C' + str(i) for i in pd1.columns]
+        outputColumns = inputColumns + ['label']
+    else:
+        pdf = pd.DataFrame(X)
+        inputColumns = ['C' + str(i) for i in pdf.columns]
+        outputColumns = inputColumns
+    assembler = VectorAssembler(inputCols=inputColumns, outputCol='features')
+    out = assembler.transform(sqlCtx.createDataFrame(pdf, outputColumns))
+    if y is not None:
+        return out.select('features', 'label')
+    else:
+        return out.select('features')
+    
+
+def convertToMatrixBlock(sc, src):
+    if isinstance(src, spmatrix):
+        src = coo_matrix(src,  dtype=np.float64)
+        numRows = src.shape[0]
+        numCols = src.shape[1]
+        data = src.data
+        row = src.row.astype(np.int32)
+        col = src.col.astype(np.int32)
+        nnz = len(src.col)
+        buf1 = bytearray(data.tostring())
+        buf2 = bytearray(row.tostring())
+        buf3 = bytearray(col.tostring())
+        return sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertSciPyCOOToMB(buf1, buf2, buf3, numRows, numCols, nnz)
+    elif isinstance(sc, SparkContext):
+        src = np.asarray(src)
+        numCols = getNumCols(src)
+        numRows = src.shape[0]
+        arr = src.ravel().astype(np.float64)
+        buf = bytearray(arr.tostring())
+        return sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertPy4JArrayToMB(buf, numRows, numCols)
+    else:
+        raise TypeError('sc needs to be of type SparkContext') # TODO: We can generalize this by creating py4j gateway ourselves
+    
+
+def convertToNumpyArr(sc, mb):
+    if isinstance(sc, SparkContext):
+        numRows = mb.getNumRows()
+        numCols = mb.getNumColumns()
+        buf = sc._jvm.org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.convertMBtoPy4JDenseArr(mb)
+        return np.frombuffer(buf, count=numRows*numCols, dtype=np.float64).reshape((numRows, numCols))
+    else:
+        raise TypeError('sc needs to be of type SparkContext') # TODO: We can generalize this by creating py4j gateway ourselves
+
+
+def convertToPandasDF(X):
+    if not isinstance(X, pd.DataFrame):
+        return pd.DataFrame(X, columns=['C' + str(i) for i in range(getNumCols(X))])
+    return X
+    
+__all__ = [ 'getNumCols', 'convertToMatrixBlock', 'convertToNumpyArr', 'convertToPandasDF', 'SUPPORTED_TYPES' , 'convertToLabeledDF']
diff --git a/src/main/python/SystemML/defmatrix.py b/src/main/python/SystemML/defmatrix.py
new file mode 100644
index 00000000000..7e2c4533d39
--- /dev/null
+++ b/src/main/python/SystemML/defmatrix.py
@@ -0,0 +1,295 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+import numpy as np
+
+from . import pydml, MLContext
+from .converters import *
+from pyspark import SparkContext, RDD
+from pyspark.sql import DataFrame, SQLContext
+
+def setSparkContext(sc):
+    """
+    Before using the matrix, the user needs to invoke this function.
+    
+    Parameters
+    ----------
+    sc: SparkContext
+        SparkContext
+    """
+    matrix.ml = MLContext(sc)
+    matrix.sc = sc
+    
+def checkIfMLContextIsSet():
+    if matrix.ml is None:
+        raise Exception('Expected setSparkContext(sc) to be called.')
+
+class DMLOp(object):
+    def __init__(self, inputs, dml=None):
+        self.inputs = inputs
+        self.dml = dml
+        
+    def _visit(self, execute=True):
+        matrix.dml = matrix.dml + self.dml
+            
+
+def reset():
+    for m in matrix.visited:
+        m.visited = False
+    matrix.visited = []
+        
+def binaryOp(lhs, rhs, opStr):
+    inputs = []
+    if isinstance(lhs, matrix):
+        lhsStr = lhs.ID
+        inputs = [lhs]
+    elif isinstance(lhs, float) or isinstance(lhs, int):
+        lhsStr = str(lhs)
+    else:
+        raise TypeError('Incorrect type')
+    if isinstance(rhs, matrix):
+        rhsStr = rhs.ID
+        inputs = inputs + [rhs]
+    elif isinstance(rhs, float) or isinstance(rhs, int):
+        rhsStr = str(rhs)
+    else:
+        raise TypeError('Incorrect type')
+    dmlOp = DMLOp(inputs)
+    out = matrix(None, op=dmlOp)
+    dmlOp.dml = [out.ID, ' = ', lhsStr, opStr, rhsStr, '\n']
+    return out
+
+def binaryMatrixFunction(X, Y, fnName):
+    if not isinstance(X, matrix) or not isinstance(Y, matrix):
+        raise TypeError('Incorrect input type. Expected matrix type')
+    inputs = [X, Y]
+    dmlOp = DMLOp(inputs)
+    out = matrix(None, op=dmlOp)
+    dmlOp.dml = [out.ID, ' = ', fnName,'(', X.ID, ', ', Y.ID, ')\n']
+    return out
+
+def solve(A, b):
+    return binaryMatrixFunction(A, b, 'solve')
+
+    
+def eval(outputs, outputDF=False, execute=True):
+    """
+    Executes the unevaluated DML script and computes the matrices specified by outputs.
+
+    Parameters
+    ----------
+    outputs: list of matrices
+    outputDF: back the data of matrix as PySpark DataFrame
+    """
+    checkIfMLContextIsSet()
+    reset()
+    matrix.dml = []
+    matrix.script = pydml('')
+    if isinstance(outputs, matrix):
+        outputs = [ outputs ]
+    elif not isinstance(outputs, list):
+        raise TypeError('Incorrect input type')
+    for m in outputs:
+        m.output = True
+        m._visit(execute=execute)
+    if not execute:
+        return ''.join(matrix.dml)
+    matrix.script.scriptString = ''.join(matrix.dml)
+    results = matrix.ml.execute(matrix.script)
+    for m in outputs:
+        if outputDF:
+            m.data = results.getDataFrame(m.ID)
+        else:
+            m.data = results.getNumPyArray(m.ID)
+    
+# Instead of inheriting from np.matrix
+class matrix(object):
+    systemmlVarID = 0
+    dml = []
+    script = None
+    ml = None
+    visited = []
+    def __init__(self, data, op=None):
+        """
+        Constructs a lazy matrix
+        
+        Parameters
+        ----------
+        data: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark DataFrame. (data cannot be None for external users, 'data=None' is used internally for lazy evaluation).
+        """
+        checkIfMLContextIsSet()
+        self.visited = False 
+        matrix.systemmlVarID += 1
+        self.output = False
+        self.ID = 'mVar' + str(matrix.systemmlVarID)
+        if isinstance(data, SUPPORTED_TYPES):
+            self.data = data
+        elif hasattr(data, '_jdf'):
+            self.data = data
+        elif data is None and op is not None:
+            self.data = None
+            # op refers to the node of Abstract Syntax Tree created internally for lazy evaluation
+            self.op = op
+        else:
+            raise TypeError('Unsupported input type')
+            
+    def eval(self, outputDF=False):
+        eval([self], outputDF=False)
+        
+    def toPandas(self):
+        if self.data is None:
+            self.eval()
+        return convertToPandasDF(self.data)    
+    
+    def toNumPyArray(self):
+        if self.data is None:
+            self.eval()
+        if isinstance(self.data, DataFrame):
+            self.data = self.data.toPandas().as_matrix()
+        # Always keep default format as NumPy array if possible
+        return self.data
+    
+    def toDataFrame(self):
+        if self.data is None:
+            self.eval(outputDF=True)
+        if not isinstance(self.data, DataFrame):
+            if MLResults.sqlContext is None:
+                MLResults.sqlContext = SQLContext(matrix.sc)
+            self.data = sqlContext.createDataFrame(self.toPandas())
+        return self.data
+        
+    def _visit(self, execute=True):
+        if self.visited:
+            return self
+        self.visited = True
+        # for cleanup
+        matrix.visited = matrix.visited + [ self ]
+        if self.data is not None:
+            matrix.dml = matrix.dml + [ self.ID,  ' = load(\" \", format=\"csv\")\n']
+            if isinstance(self.data, DataFrame) and execute:
+                matrix.script.input(self.ID, self.data)
+            elif  execute:
+                matrix.script.input(self.ID, convertToMatrixBlock(matrix.sc, self.data))
+            return self
+        elif self.op is not None:
+            for m in self.op.inputs:
+                m._visit(execute=execute)
+            self.op._visit(execute=execute)
+        else:
+            raise Exception('Expected either op or data to be set')
+        if self.data is None and self.output:
+            matrix.dml = matrix.dml + ['save(',  self.ID, ', \" \")\n']
+            if execute:
+                matrix.script.out(self.ID)
+        return self
+    
+    def __repr__(self):
+        if self.data is None:
+            print('# This matrix (' + self.ID + ') is backed by below given PyDML script (which is not yet evaluated). To fetch the data of this matrix, invoke toNumPyArray() or toDataFrame() or toPandas() methods.\n' + eval([self], execute=False))
+        elif isinstance(self.data, DataFrame):
+            print('# This matrix (' + self.ID + ') is backed by PySpark DataFrame. To fetch the DataFrame, invoke toDataFrame() method.')
+        else:
+            print('# This matrix (' + self.ID + ') is backed by NumPy array. To fetch the NumPy array, invoke toNumPyArray() method.')
+        return '<SystemML.defmatrix.matrix object>'
+        
+    def __add__(self, other):
+        return binaryOp(self, other, ' + ')
+        
+    def __sub__(self, other):
+        return binaryOp(self, other, ' - ')
+        
+    def __mul__(self, other):
+        return binaryOp(self, other, ' * ')
+        
+    def __floordiv__(self, other):
+        return binaryOp(self, other, ' // ')
+        
+    def __div__(self, other):
+        return binaryOp(self, other, ' / ')
+        
+    def __mod__(self, other):
+        return binaryOp(self, other, ' % ')
+        
+    def __pow__(self, other):
+        return binaryOp(self, other, ' ** ')
+
+    def __radd__(self, other):
+        return binaryOp(other, self, ' + ')
+        
+    def __rsub__(self, other):
+        return binaryOp(other, self, ' - ')
+        
+    def __rmul__(self, other):
+        return binaryOp(other, self, ' * ')
+        
+    def __rfloordiv__(self, other):
+        return binaryOp(other, self, ' // ')
+        
+    def __rdiv__(self, other):
+        return binaryOp(other, self, ' / ')
+        
+    def __rmod__(self, other):
+        return binaryOp(other, self, ' % ')
+        
+    def __rpow__(self, other):
+        return binaryOp(other, self, ' ** ')
+        
+    def sum(self, axis=None):
+        return self._aggFn('sum', axis)
+
+    def mean(self, axis=None):
+        return self._aggFn('mean', axis)
+
+    def max(self, axis=None):
+        return self._aggFn('max', axis)
+
+    def min(self, axis=None):
+        return self._aggFn('min', axis)
+
+    def argmin(self, axis=None):
+        return self._aggFn('argmin', axis)
+
+    def argmax(self, axis=None):
+        return self._aggFn('argmax', axis)
+        
+    def cumsum(self, axis=None):
+        return self._aggFn('cumsum', axis)
+
+    def transpose(self, axis=None):
+        return self._aggFn('transpose', axis)
+
+    def trace(self, axis=None):
+        return self._aggFn('trace', axis)
+        
+    def _aggFn(self, fnName, axis):
+        dmlOp = DMLOp([self])
+        out = matrix(None, op=dmlOp)
+        if axis is None:
+            dmlOp.dml = [out.ID, ' = ', fnName, '(', self.ID, ')\n']
+        else:
+            dmlOp.dml = [out.ID, ' = ', fnName, '(', self.ID, ', axis=', str(axis) ,')\n']
+        return out        
+
+    def dot(self, other):
+        return binaryMatrixFunction(self, other, 'dot')
+            
+__all__ = [ 'setSparkContext', 'matrix', 'eval', 'solve']
\ No newline at end of file
diff --git a/src/main/python/SystemML.py b/src/main/python/SystemML/mlcontext.py
similarity index 84%
rename from src/main/python/SystemML.py
rename to src/main/python/SystemML/mlcontext.py
index 7142a9ddb77..7ed277a8fa4 100644
--- a/src/main/python/SystemML.py
+++ b/src/main/python/SystemML/mlcontext.py
@@ -1,3 +1,4 @@
+#!/usr/bin/python
 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
@@ -20,20 +21,25 @@
 #-------------------------------------------------------------
 import os
 
-from py4j.java_gateway import JavaObject
+try:
+    from py4j.java_gateway import JavaObject
+except ImportError:
+    raise ImportError('Unable to import JavaObject from py4j.java_gateway. Hint: Make sure you are running with pyspark')
+    
 from pyspark import SparkContext
 import pyspark.mllib.common
-
+from pyspark.sql import DataFrame, SQLContext
+from .converters import *
 
 def dml(scriptString):
     """
     Create a dml script object based on a string.
-
+    
     Parameters
     ----------
     scriptString: string
         Can be a path to a dml script or a dml script itself.
-
+        
     Returns
     -------
     script: Script instance
@@ -47,12 +53,12 @@ def dml(scriptString):
 def pydml(scriptString):
     """
     Create a pydml script object based on a string.
-
+    
     Parameters
     ----------
     scriptString: string
         Can be a path to a pydml script or a pydml script itself.
-
+        
     Returns
     -------
     script: Script instance
@@ -86,12 +92,12 @@ def _py2java(sc, obj):
 class Matrix(object):
     """
     Wrapper around a Java Matrix object.
-
+    
     Parameters
     ----------
     javaMatrix: JavaObject
         A Java Matrix object as returned by calling `ml.execute().get()`.
-
+        
     sc: SparkContext
         SparkContext
     """
@@ -105,7 +111,7 @@ def __repr__(self):
     def toDF(self):
         """
         Convert the Matrix to a PySpark SQL DataFrame.
-
+        
         Returns
         -------
         df: PySpark SQL DataFrame
@@ -122,22 +128,51 @@ def toDF(self):
 class MLResults(object):
     """
     Wrapper around a Java ML Results object.
-
+    
     Parameters
     ----------
     results: JavaObject
         A Java MLResults object as returned by calling `ml.execute()`.
-
+        
     sc: SparkContext
         SparkContext
     """
     def __init__(self, results, sc):
         self._java_results = results
         self.sc = sc
+        try:
+            if MLResults.sqlContext is None:
+                MLResults.sqlContext = SQLContext(sc)
+        except AttributeError:
+            MLResults.sqlContext = SQLContext(sc)
 
     def __repr__(self):
         return "MLResults"
 
+    def getNumPyArray(self, *outputs):
+        """
+        Parameters
+        ----------
+        outputs: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        outs = [convertToNumpyArr(self.sc, self._java_results.getMatrix(out).asBinaryBlockMatrix().getMatrixBlock()) for out in outputs]
+        if len(outs) == 1:
+            return outs[0]
+        return outs
+    
+    def getDataFrame(self, *outputs):
+        """
+        Parameters
+        ----------
+        outputs: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        outs = [DataFrame(self._java_results.getDataFrame(out), MLResults.sqlContext) for out in outputs]
+        if len(outs) == 1:
+            return outs[0]
+        return outs
+            
     def get(self, *outputs):
         """
         Parameters
@@ -159,7 +194,7 @@ class Script(object):
     ----------
     scriptString: string
         Can be either a file path to a DML script or a DML script itself.
-
+    
     scriptType: string
         Script language, either "dml" for DML (R-like) or "pydml" for PyDML (Python-like).
     """
@@ -256,3 +291,6 @@ def execute(self, script):
         for val in script._output:
             script_java.out(val)
         return MLResults(self._ml.execute(script_java), self._sc)
+
+
+__all__ = ['MLResults', 'MLContext', 'Script', 'dml', 'pydml']
\ No newline at end of file
diff --git a/src/main/python/SystemML/mllearn/__init__.py b/src/main/python/SystemML/mllearn/__init__.py
new file mode 100644
index 00000000000..69cab58eee9
--- /dev/null
+++ b/src/main/python/SystemML/mllearn/__init__.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from .estimators import *
+
+__all__ = estimators.__all__
\ No newline at end of file
diff --git a/src/main/python/SystemML/mllearn/estimators.py b/src/main/python/SystemML/mllearn/estimators.py
new file mode 100644
index 00000000000..5d33d644c98
--- /dev/null
+++ b/src/main/python/SystemML/mllearn/estimators.py
@@ -0,0 +1,302 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from pyspark.context import SparkContext 
+from pyspark.sql import DataFrame, SQLContext
+from pyspark.rdd import RDD
+import numpy as np
+import pandas as pd
+import sklearn as sk
+from pyspark.ml.feature import VectorAssembler
+from pyspark.mllib.linalg import Vectors
+from pyspark.ml import Estimator, Model
+
+from ..converters import *
+
+def assemble(sqlCtx, pdf, inputCols, outputCol):
+    tmpDF = sqlCtx.createDataFrame(pdf, list(pdf.columns))
+    assembler = VectorAssembler(inputCols=list(inputCols), outputCol=outputCol)
+    return assembler.transform(tmpDF)
+
+class BaseSystemMLEstimator(Estimator):
+    featuresCol = 'features'
+    labelCol = 'label'
+    
+    def setFeaturesCol(self, colName):
+        """
+        Sets the default column name for features of PySpark DataFrame.
+        
+        Parameters
+        ----------
+        colName: column name for features (default: 'features')
+        """
+        self.featuresCol = colName
+        
+    def setLabelCol(self, colName):
+        """
+        Sets the default column name for features of PySpark DataFrame.
+        
+        Parameters
+        ----------
+        colName: column name for features (default: 'label')
+        """
+        self.labelCol = colName
+        
+    # Returns a model after calling fit(df) on Estimator object on JVM    
+    def _fit(self, X):
+        """
+        Invokes the fit method on Estimator object on JVM if X is PySpark DataFrame
+        
+        Parameters
+        ----------
+        X: PySpark DataFrame that contain the columns featuresCol (default: 'features') and labelCol (default: 'label')
+        """
+        if hasattr(X, '_jdf') and self.featuresCol in X.columns and self.labelCol in X.columns:
+            self.model = self.estimator.fit(X._jdf)
+            return self
+        else:
+            raise Exception('Incorrect usage: Expected dataframe as input with features/label as columns')
+    
+    def fit(self, X, y=None, params=None):
+        """
+        Invokes the fit method on Estimator object on JVM if X and y are on of the supported data types
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        if y is None:
+            return self._fit(X)
+        elif y is not None and isinstance(X, SUPPORTED_TYPES) and isinstance(y, SUPPORTED_TYPES):
+            if self.transferUsingDF:
+                pdfX = convertToPandasDF(X)
+                pdfY = convertToPandasDF(y)
+                if getNumCols(pdfY) != 1:
+                    raise Exception('y should be a column vector')
+                if pdfX.shape[0] != pdfY.shape[0]:
+                    raise Exception('Number of rows of X and y should match')
+                colNames = pdfX.columns
+                pdfX[self.labelCol] = pdfY[pdfY.columns[0]]
+                df = assemble(self.sqlCtx, pdfX, colNames, self.featuresCol).select(self.featuresCol, self.labelCol)
+                self.model = self.estimator.fit(df._jdf)
+            else:
+                numColsy = getNumCols(y)
+                if numColsy != 1:
+                    raise Exception('Expected y to be a column vector')
+                self.model = self.estimator.fit(convertToMatrixBlock(self.sc, X), convertToMatrixBlock(self.sc, y))
+            if self.setOutputRawPredictionsToFalse:
+                self.model.setOutputRawPredictions(False)
+            return self
+        else:
+            raise Exception('Unsupported input type')
+    
+    def transform(self, X):
+        return self.predict(X)
+    
+    # Returns either a DataFrame or MatrixBlock after calling transform(X:MatrixBlock, y:MatrixBlock) on Model object on JVM    
+    def predict(self, X):
+        """
+        Invokes the transform method on Estimator object on JVM if X and y are on of the supported data types
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark DataFrame
+        """
+        if isinstance(X, SUPPORTED_TYPES):
+            if self.transferUsingDF:
+                pdfX = convertToPandasDF(X)
+                df = assemble(self.sqlCtx, pdfX, pdfX.columns, self.featuresCol).select(self.featuresCol)
+                retjDF = self.model.transform(df._jdf)
+                retDF = DataFrame(retjDF, self.sqlCtx)
+                retPDF = retDF.sort('ID').select('prediction').toPandas()
+                if isinstance(X, np.ndarray):
+                    return retPDF.as_matrix().flatten()
+                else:
+                    return retPDF
+            else:
+                retNumPy = convertToNumpyArr(self.sc, self.model.transform(convertToMatrixBlock(self.sc, X)))
+                if isinstance(X, np.ndarray):
+                    return retNumPy
+                else:
+                    return retNumPy # TODO: Convert to Pandas
+        elif hasattr(X, '_jdf'):
+            if self.featuresCol in X.columns:
+                # No need to assemble as input DF is likely coming via MLPipeline
+                df = X
+            else:
+                assembler = VectorAssembler(inputCols=X.columns, outputCol=self.featuresCol)
+                df = assembler.transform(X)
+            retjDF = self.model.transform(df._jdf)
+            retDF = DataFrame(retjDF, self.sqlCtx)
+            # Return DF
+            return retDF.sort('ID')
+        else:
+            raise Exception('Unsupported input type')
+            
+class BaseSystemMLClassifier(BaseSystemMLEstimator):
+
+    def score(self, X, y):
+        """
+        Scores the predicted value with ground truth 'y'
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        return sk.metrics.accuracy_score(y, self.predict(X))    
+
+class BaseSystemMLRegressor(BaseSystemMLEstimator):
+
+    def score(self, X, y):
+        """
+        Scores the predicted value with ground truth 'y'
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        return sk.metrics.r2_score(y, self.predict(X), multioutput='variance_weighted')
+
+
+class LogisticRegression(BaseSystemMLClassifier):
+    def __init__(self, sqlCtx, penalty='l2', fit_intercept=True, max_iter=100, max_inner_iter=0, tol=0.000001, C=1.0, solver='newton-cg', transferUsingDF=False):
+        """
+        Performs both binomial and multinomial logistic regression.
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        penalty: Only 'l2' supported
+        fit_intercept: Specifies whether to add intercept or not (default: True)
+        max_iter: Maximum number of outer (Fisher scoring) iterations (default: 100)
+        max_inner_iter: Maximum number of inner (conjugate gradient) iterations, or 0 if no maximum limit provided (default: 0)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        solver: Only 'newton-cg' solver supported
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "logReg"
+        self.estimator = self.sc._jvm.org.apache.sysml.api.ml.LogisticRegression(self.uid, self.sc._jsc.sc())
+        self.estimator.setMaxOuterIter(max_iter)
+        self.estimator.setMaxInnerIter(max_inner_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = True
+        if penalty != 'l2':
+            raise Exception('Only l2 penalty is supported')
+        if solver != 'newton-cg':
+            raise Exception('Only newton-cg solver supported')
+
+class LinearRegression(BaseSystemMLRegressor):
+
+    def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, solver='newton-cg', transferUsingDF=False):
+        """
+        Performs linear regression to model the relationship between one numerical response variable and one or more explanatory (feature) variables..
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        fit_intercept: Specifies whether to add intercept or not (default: True)
+        max_iter: Maximum number of conjugate gradient iterations, or 0 if no maximum limit provided (default: 100)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        solver: Supports either 'newton-cg' or 'direct-solve' (default: 'newton-cg').  
+        Depending on the size and the sparsity of the feature matrix, one or the other solver may be more efficient.
+        'direct-solve' solver is more efficient when the number of features is relatively small (m < 1000) and
+        input matrix X is either tall or fairly dense; otherwise 'newton-cg' solver is more efficient.
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "lr"
+        if solver == 'newton-cg' or solver == 'direct-solve':
+            self.estimator = self.sc._jvm.org.apache.sysml.api.ml.LinearRegression(self.uid, self.sc._jsc.sc(), solver)
+        else:
+            raise Exception('Only newton-cg solver supported')
+        self.estimator.setMaxIter(max_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False
+
+
+class SVM(BaseSystemMLClassifier):
+
+    def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, C=1.0, is_multi_class=False, transferUsingDF=False):
+        """
+        Performs both binary-class and multiclass SVM (Support Vector Machines).
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        fit_intercept: Specifies whether to add intercept or not (default: True)
+        max_iter: Maximum number iterations (default: 100)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        is_multi_class: Specifies whether to use binary-class SVM or multi-class SVM algorithm (default: False)
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "svm"
+        self.estimator = self.sc._jvm.org.apache.sysml.api.ml.SVM(self.uid, self.sc._jsc.sc(), is_multi_class)
+        self.estimator.setMaxIter(max_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False    
+
+class NaiveBayes(BaseSystemMLClassifier):
+
+    def __init__(self, sqlCtx, laplace=1.0, transferUsingDF=False):
+        """
+        Performs both binary-class and multiclass SVM (Support Vector Machines).
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        laplace: Laplace smoothing specified by the user to avoid creation of 0 probabilities (default: 1.0)
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "nb"
+        self.estimator = self.sc._jvm.org.apache.sysml.api.ml.NaiveBayes(self.uid, self.sc._jsc.sc())
+        self.estimator.setLaplace(laplace)
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False
+
+__all__ = ['LogisticRegression', 'LinearRegression', 'SVM', 'NaiveBayes']
diff --git a/src/main/python/setup.py b/src/main/python/setup.py
new file mode 100644
index 00000000000..0bcebabf076
--- /dev/null
+++ b/src/main/python/setup.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from setuptools import setup, find_packages
+import os
+import time
+
+VERSION = '0.11.0.dev1'
+RELEASED_DATE = str(time.strftime("%m/%d/%Y"))
+numpy_version = '1.8.2'
+scipy_version = '0.15.1'
+REQUIRED_PACKAGES = [
+    'numpy >= %s' % numpy_version,
+    'scipy >= %s' % scipy_version
+]
+
+PACKAGE_DATA = []
+for path, subdirs, files in os.walk('SystemML/SystemML-java'):
+    for name in files:
+        PACKAGE_DATA = PACKAGE_DATA + [ os.path.join(path, name).replace('./', '') ]
+        
+setup(
+    name='SystemML',
+    version=VERSION,
+    description='Apache SystemML is a distributed and declarative machine learning platform.',
+    long_description='''
+    
+    Apache SystemML is an effort undergoing incubation at the Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.
+    While incubation status is not necessarily a reflection of the completeness
+    or stability of the code, it does indicate that the project has yet to be
+    fully endorsed by the ASF.
+    
+    Apache SystemML provides declarative large-scale machine learning (ML) that aims at 
+    flexible specification of ML algorithms and automatic generation of hybrid runtime 
+    plans ranging from single-node, in-memory computations, to distributed computations on Apache Hadoop and Apache Spark.
+    
+    Note: This is not a released version and was built with SNAPSHOT available on the date''' + RELEASED_DATE,
+    url='http://systemml.apache.org/',
+    author='Apache SystemML',
+    author_email='dev@systemml.incubator.apache.org',
+    packages=find_packages(),
+    install_requires=REQUIRED_PACKAGES,
+    include_package_data=True,
+    package_data={
+        'SystemML-java': PACKAGE_DATA
+    },
+    classifiers=[
+        'Intended Audience :: Developers',
+        'Intended Audience :: Education',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 2.7',
+        'Topic :: Scientific/Engineering :: Mathematics',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+        'Topic :: Software Development :: Libraries',
+        ],
+    license='Apache 2.0',
+    )
\ No newline at end of file
diff --git a/src/main/python/SystemMLtests.py b/src/main/python/tests/test_mlcontext.py
similarity index 99%
rename from src/main/python/SystemMLtests.py
rename to src/main/python/tests/test_mlcontext.py
index e11a694677a..ec5a1964a6e 100644
--- a/src/main/python/SystemMLtests.py
+++ b/src/main/python/tests/test_mlcontext.py
@@ -101,4 +101,4 @@ def test_pydml(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysml/api/python/test.py b/src/main/python/tests/test_mllearn.py
similarity index 92%
rename from src/main/java/org/apache/sysml/api/python/test.py
rename to src/main/python/tests/test_mllearn.py
index 21a1f79fd5c..22f798f7c70 100644
--- a/src/main/java/org/apache/sysml/api/python/test.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -20,7 +20,7 @@
 #
 #-------------------------------------------------------------
 from sklearn import datasets, neighbors
-import SystemML as sml
+from SystemML.mllearn import LogisticRegression, LinearRegression, SVM, NaiveBayes 
 from pyspark.sql import SQLContext
 from pyspark.context import SparkContext
 import unittest
@@ -47,7 +47,7 @@ def testLogisticSK1(self):
         y_train = y_digits[:.9 * n_samples]
         X_test = X_digits[.9 * n_samples:]
         y_test = y_digits[.9 * n_samples:]
-        logistic = sml.mllearn.LogisticRegression(sqlCtx)
+        logistic = LogisticRegression(sqlCtx)
         score = logistic.fit(X_train, y_train).score(X_test, y_test)
         self.failUnless(score > 0.9)
         
@@ -61,7 +61,7 @@ def testLogisticSK2(self):
         X_test = X_digits[.9 * n_samples:]
         y_test = y_digits[.9 * n_samples:]
         # Convert to DataFrame for i/o: current way to transfer data
-        logistic = sml.mllearn.LogisticRegression(sqlCtx, transferUsingDF=True)
+        logistic = LogisticRegression(sqlCtx, transferUsingDF=True)
         score = logistic.fit(X_train, y_train).score(X_test, y_test)
         self.failUnless(score > 0.9)
 
@@ -82,7 +82,7 @@ def testLogisticMLPipeline1(self):
             ], ["id", "text", "label"])
         tokenizer = Tokenizer(inputCol="text", outputCol="words")
         hashingTF = HashingTF(inputCol="words", outputCol="features", numFeatures=20)
-        lr = sml.mllearn.LogisticRegression(sqlCtx)
+        lr = LogisticRegression(sqlCtx)
         pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
         model = pipeline.fit(training)
         test = sqlCtx.createDataFrame([
@@ -103,7 +103,7 @@ def testLinearRegressionSK1(self):
         diabetes_X_test = diabetes_X[-20:]
         diabetes_y_train = diabetes.target[:-20]
         diabetes_y_test = diabetes.target[-20:]
-        regr = sml.mllearn.LinearRegression(sqlCtx)
+        regr = LinearRegression(sqlCtx)
         regr.fit(diabetes_X_train, diabetes_y_train)
         score = regr.score(diabetes_X_test, diabetes_y_test)
         self.failUnless(score > 0.4) # TODO: Improve r2-score (may be I am using it incorrectly)
@@ -115,7 +115,7 @@ def testLinearRegressionSK2(self):
         diabetes_X_test = diabetes_X[-20:]
         diabetes_y_train = diabetes.target[:-20]
         diabetes_y_test = diabetes.target[-20:]
-        regr = sml.mllearn.LinearRegression(sqlCtx, transferUsingDF=True)
+        regr = LinearRegression(sqlCtx, transferUsingDF=True)
         regr.fit(diabetes_X_train, diabetes_y_train)
         score = regr.score(diabetes_X_test, diabetes_y_test)
         self.failUnless(score > 0.4) # TODO: Improve r2-score (may be I am using it incorrectly)
@@ -129,7 +129,7 @@ def testSVMSK1(self):
         y_train = y_digits[:.9 * n_samples]
         X_test = X_digits[.9 * n_samples:]
         y_test = y_digits[.9 * n_samples:]
-        svm = sml.mllearn.SVM(sqlCtx, is_multi_class=True)
+        svm = SVM(sqlCtx, is_multi_class=True)
         score = svm.fit(X_train, y_train).score(X_test, y_test)
         self.failUnless(score > 0.9)
 
@@ -142,7 +142,7 @@ def testSVMSK2(self):
         y_train = y_digits[:.9 * n_samples]
         X_test = X_digits[.9 * n_samples:]
         y_test = y_digits[.9 * n_samples:]
-        svm = sml.mllearn.SVM(sqlCtx, is_multi_class=True, transferUsingDF=True)
+        svm = SVM(sqlCtx, is_multi_class=True, transferUsingDF=True)
         score = svm.fit(X_train, y_train).score(X_test, y_test)
         self.failUnless(score > 0.9)
 
@@ -155,7 +155,7 @@ def testNaiveBayesSK1(self):
         y_train = y_digits[:.9 * n_samples]
         X_test = X_digits[.9 * n_samples:]
         y_test = y_digits[.9 * n_samples:]
-        nb = sml.mllearn.NaiveBayes(sqlCtx)
+        nb = NaiveBayes(sqlCtx)
         score = nb.fit(X_train, y_train).score(X_test, y_test)
         self.failUnless(score > 0.85)
 
@@ -167,7 +167,7 @@ def testNaiveBayesSK2(self):
         # Both vectors and vectors_test are SciPy CSR matrix
         vectors = vectorizer.fit_transform(newsgroups_train.data)
         vectors_test = vectorizer.transform(newsgroups_test.data)
-        nb = sml.mllearn.NaiveBayes(sqlCtx)
+        nb = NaiveBayes(sqlCtx)
         nb.fit(vectors, newsgroups_train.target)
         pred = nb.predict(vectors_test)
         score = metrics.f1_score(newsgroups_test.target, pred, average='weighted')
@@ -175,4 +175,4 @@ def testNaiveBayesSK2(self):
         
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
\ No newline at end of file
diff --git a/src/main/python/uploadToPyPI.sh b/src/main/python/uploadToPyPI.sh
new file mode 100644
index 00000000000..c892f3df358
--- /dev/null
+++ b/src/main/python/uploadToPyPI.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+cd ../../..
+mvn clean package -P distribution
+tar -xzf target/systemml-*-SNAPSHOT.tar.gz -C src/main/python/SystemML
+
+cd src/main/python/SystemML
+mv systemml-*-incubating-SNAPSHOT SystemML-java
+
+cd ..
+echo "Preparing to upload to PyPI ...."
+python setup.py register sdist upload
+
+rm -r SystemML/SystemML-java
\ No newline at end of file