diff --git a/docs/source/qml.rst b/docs/source/qml.rst
index 5a89437c7..cf23cb9bb 100644
--- a/docs/source/qml.rst
+++ b/docs/source/qml.rst
@@ -113,3 +113,14 @@ qml\.aglaia module
    :inherited-members:
 
 
+qml\.qmlearn.representations module
+------------------
+
+.. automodule:: qml.qmlearn.representations
+   :inherited-members:
+
+qml\.qmlearn.kernels module
+------------------
+
+.. automodule:: qml.qmlearn.kernels
+   :inherited-members:
diff --git a/examples/qmlearn.py b/examples/qmlearn.py
new file mode 100644
index 000000000..84af9e7be
--- /dev/null
+++ b/examples/qmlearn.py
@@ -0,0 +1,287 @@
+import glob
+import numpy as np
+from qml import qmlearn
+import sklearn.pipeline
+import sklearn.model_selection
+
+def data():
+    """
+    Using the Data object.
+    """
+    print("*** Begin data examples ***")
+
+    # The Data object has the same role as the Compound class.
+    # Where the Compound class is for one compound, the Data class
+    # Is for multiple
+
+    # One can load in a set of xyz files
+    filenames = sorted(glob.glob("../test/qm7/00*.xyz"))
+    data = qmlearn.Data(filenames)
+    print("length of filenames", len(filenames))
+    print("length of nuclear_charges", len(data.nuclear_charges))
+    print("length of coordinates", len(data.coordinates))
+
+    # Or just load a glob string
+    data = qmlearn.Data("../test/qm7/00*.xyz")
+    print("length of nuclear_charges", len(data.nuclear_charges))
+
+    # Energies (or other molecular properties) can be stored in the object
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)[:98]
+    data.set_energies(energies)
+    print("length of energies", len(data.energies))
+
+    print("*** End data examples ***")
+    print()
+
+def preprocessing():
+    """
+    Rescaling energies
+    """
+
+    print("*** Begin preprocessing examples ***")
+
+    # The AtomScaler object does a linear fit of the number of each element to the energy.
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+
+    # Input can be nuclear_charges and energies
+    print("Energies before rescaling", energies[:3])
+    rescaled_energies = qmlearn.preprocessing.AtomScaler().fit_transform(data.nuclear_charges, energies)
+    print("Energies after rescaling", rescaled_energies[:3])
+
+    # Or a data object can be used
+    data.set_energies(energies)
+    data2 = qmlearn.preprocessing.AtomScaler().fit_transform(data)
+    print("Energies after rescaling", data2.energies[:3])
+
+    print("*** End preprocessing examples ***")
+    print()
+
+def representations():
+    """
+    Creating representations. Currently implemented representations are
+    CoulombMatrix, AtomicCoulombMatrix, AtomicSLATM, GlobalSLATM,
+    FCHLRepresentations, AtomCenteredSymmetryFunctions. 
+    (BagOfBonds is still missing)
+    """
+
+    print("*** Begin representations examples ***")
+
+    data = qmlearn.Data("../test/qm7/*.xyz")
+
+    # Representations can be created from a data object
+    model = qmlearn.representations.CoulombMatrix(sorting ='row-norm')
+    representations = model.generate(data)
+    print("Shape of representations:", representations.shape)
+
+    # Alternatively the data object can be passed at initialization of the representation class
+    # and only select molecule indices can be parsed
+
+    model = qmlearn.representations.CoulombMatrix(data)
+    representations = model.generate([0,5,7,16])
+    print("Shape of representations:", representations.shape)
+
+    print("*** End representations examples ***")
+    print()
+
+def kernels():
+    """
+    Create kernels. Currently implemented kernels are GaussianKernel,
+    LaplacianKernel, FCHLKernel.
+    """
+
+    print("*** Begin kernels examples ***")
+
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Kernels can be created from representations
+    model = qmlearn.representations.CoulombMatrix(data)
+    indices = np.arange(100)
+    representations = model.generate(indices)
+
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:80])
+    print("Shape of symmetric kernels:", symmetric_kernels.shape)
+
+    asymmetric_kernels = model.generate(representations[:80], representations[80:])
+    print("Shape of asymmetric kernels:", asymmetric_kernels.shape)
+
+    # Atomic representations can be used as well
+    model = qmlearn.representations.AtomicCoulombMatrix(data)
+    indices = np.arange(100)
+    representations = model.generate(indices)
+
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:80], representation_type = 'atomic')
+    print("Shape of symmetric kernels:", symmetric_kernels.shape)
+
+    asymmetric_kernels = model.generate(representations[:80], representations[80:], representation_type = 'atomic')
+    print("Shape of asymmetric kernels:", asymmetric_kernels.shape)
+
+    print("*** End kernels examples ***")
+    print()
+
+def models():
+    """
+    Regression models. Only KernelRidgeRegression implemented so far.
+    """
+
+    print("*** Begin models examples ***")
+
+    filenames = sorted(glob.glob("../test/qm7/*.xyz"))
+    data = qmlearn.Data(filenames)
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    model = qmlearn.representations.CoulombMatrix(data)
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    representations = model.generate(indices)
+    model = qmlearn.kernels.GaussianKernel(sigma='auto')
+    symmetric_kernels = model.generate(representations[:800])
+    asymmetric_kernels = model.generate(representations[:800], representations[800:])
+
+    # Model can be fit giving kernel matrix and energies
+
+    model = qmlearn.models.KernelRidgeRegression()
+    model.fit(symmetric_kernels, energies[indices[:800]])
+    print("Fitted KRR weights:", model.alpha[:3])
+
+    # Predictions can be had from an asymmetric kernel
+    predictions = model.predict(asymmetric_kernels)
+    print("Predicted energies:", predictions[:3])
+    print("True energies:", energies[indices[:3]])
+
+    # Or the score (default negative mae) can be had directly
+    scores = model.score(asymmetric_kernels, energies[indices[800:]])
+    print("Negative MAE:", scores)
+
+    print("*** End models examples ***")
+    print()
+
+def pipelines():
+    """
+    Constructing scikit-learn pipelines
+    """
+
+    print("*** Begin pipelines examples ***")
+
+    # It is much easier to do all this with a scikit-learn pipeline
+
+    # Create data
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(),
+            qmlearn.models.KernelRidgeRegression(),
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    model.fit(indices[:800])
+    scores = model.score(indices[800:])
+    print("Negative MAE:", scores)
+
+    # Passing alchemy=False to kernels makes sure that the atomic kernel only compares C to C, H to H etc.
+    # This will speed up kernels of some representations dramatically, but only works in pipelines
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(alchemy=False),
+            qmlearn.models.KernelRidgeRegression(),
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    model.fit(indices[:800])
+    scores = model.score(indices[800:])
+    print("Negative MAE without alchemy:", scores)
+
+    print("*** End pipelines examples ***")
+    print()
+
+def cross_validation():
+    """
+    Doing cross validation with qmlearn
+    """
+
+    print("*** Begin CV examples ***")
+
+    # Create data
+    data = qmlearn.Data("../test/qm7/*.xyz")
+    energies = np.loadtxt("../test/data/hof_qm7.txt", usecols=1)
+    data.set_energies(energies)
+
+    # Create model
+    model = sklearn.pipeline.make_pipeline(
+            qmlearn.preprocessing.AtomScaler(data),
+            qmlearn.representations.CoulombMatrix(),
+            qmlearn.kernels.GaussianKernel(),
+            qmlearn.models.KernelRidgeRegression(),
+            # memory='/dev/shm/' ### This will cache the previous steps to the virtual memory and might speed up gridsearch
+            )
+
+    # Create 1000 random indices
+    indices = np.arange(1000)
+    np.random.shuffle(indices)
+
+    # 3-fold CV of a given model can easily be done
+    scores = sklearn.model_selection.cross_validate(model, indices, cv=3)
+    print("Cross-validated scores:", scores['test_score'])
+
+    # Doing a grid search over hyper parameters
+    params = {'gaussiankernel__sigma': [10, 30, 100],
+              'kernelridgeregression__l2_reg': [1e-8, 1e-4],
+             }
+
+    grid = sklearn.model_selection.GridSearchCV(model, cv=3, refit=False, param_grid=params)
+    grid.fit(indices)
+    print("Best hyper parameters:", grid.best_params_)
+    print("Best score:", grid.best_score_)
+
+    # As an alternative the pipeline can be constructed slightly different, which allows more complex CV
+    # Create model
+    model = sklearn.pipeline.Pipeline([
+            ('preprocess', qmlearn.preprocessing.AtomScaler(data)),
+            ('representations', qmlearn.representations.CoulombMatrix()),
+            ('kernel', qmlearn.kernels.GaussianKernel()),
+            ('model', qmlearn.models.KernelRidgeRegression())
+            ],
+            # memory='/dev/shm/' ### This will cache the previous steps to the virtual memory and might speed up gridsearch
+            )
+
+    # Doing a grid search over hyper parameters
+    # including which kernel to use
+    params = {'kernel': [qmlearn.kernels.LaplacianKernel(), qmlearn.kernels.GaussianKernel()],
+              'kernel__sigma': [10, 30, 100, 1000, 3000, 1000],
+              'model__l2_reg': [1e-8, 1e-4],
+             }
+
+    grid = sklearn.model_selection.GridSearchCV(model, cv=3, refit=False, param_grid=params)
+    grid.fit(indices)
+    print("Best hyper parameters:", grid.best_params_)
+    print("Best score:", grid.best_score_)
+
+    print("*** End CV examples ***")
+
+if __name__ == '__main__':
+    data()
+    preprocessing()
+    representations()
+    kernels()
+    models()
+    pipelines()
+    cross_validation()
diff --git a/qml/__init__.py b/qml/__init__.py
index 1d98cf687..0c4d0e346 100644
--- a/qml/__init__.py
+++ b/qml/__init__.py
@@ -40,6 +40,8 @@
 from . import arad
 from . import fchl
 from . import representations
+from . import qmlearn
+from . import utils
 
 __author__ = "Anders S. Christensen"
 __copyright__ = "Copyright 2016"
diff --git a/qml/aglaia/aglaia.py b/qml/aglaia/aglaia.py
index c174ce286..8a7808cfc 100644
--- a/qml/aglaia/aglaia.py
+++ b/qml/aglaia/aglaia.py
@@ -30,13 +30,13 @@
 import tensorflow as tf
 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 from sklearn.base import BaseEstimator
-from qml.aglaia.symm_funct import generate_parkhill_acsf
-from qml.aglaia.utils import InputError, ceil, is_positive_or_zero, is_positive_integer, is_positive, \
-        is_bool, is_positive_integer_or_zero, is_string, is_positive_integer_array, is_array_like, is_none, \
+from .symm_funct import generate_parkhill_acsf
+from ..utils import InputError, ceil, is_positive_or_zero, is_positive_integer, is_positive, \
+        is_bool, is_positive_integer_or_zero, is_string, is_positive_integer_array, is_array_like, \
         check_global_representation, check_y, check_sizes, check_dy, check_classes, is_numeric_array, is_non_zero_integer, \
     is_positive_integer_or_zero_array, check_local_representation
 
-from qml.aglaia.tf_utils import TensorBoardLogger
+from .tf_utils import TensorBoardLogger
 
 try:
     from qml.data import Compound
@@ -580,7 +580,7 @@ def _set_slatm_parameters(self, params):
         self.slatm_parameters = {'slatm_sigma1': 0.05, 'slatm_sigma2': 0.05, 'slatm_dgrid1': 0.03, 'slatm_dgrid2': 0.03,
                                  'slatm_rcut': 4.8, 'slatm_rpower': 6, 'slatm_alchemy': False}
 
-        if not is_none(params):
+        if params is not None:
             for key, value in params.items():
                 if key in self.slatm_parameters:
                     self.slatm_parameters[key] = value
@@ -597,7 +597,7 @@ def _set_acsf_parameters(self, params):
         self.acsf_parameters = {'radial_cutoff': 10.0, 'angular_cutoff': 10.0, 'radial_rs': (0.0, 0.1, 0.2),
                                 'angular_rs': (0.0, 0.1, 0.2), 'theta_s': (3.0, 2.0), 'zeta': 3.0, 'eta': 2.0}
 
-        if not is_none(params):
+        if params is not None:
             for key, value in params.items():
                 if key in self.acsf_parameters:
                     self.acsf_parameters[key] = value
@@ -658,7 +658,7 @@ def generate_compounds(self, filenames):
         """
 
         # Check that the number of properties match the number of compounds if the properties have already been set
-        if is_none(self.properties):
+        if self.properties is None:
             pass
         else:
             if self.properties.size == len(filenames):
@@ -683,18 +683,18 @@ def generate_representation(self, xyz=None, classes=None):
         :return: None
         """
 
-        if is_none(self.compounds) and is_none(xyz) and is_none(classes):
+        if self.compounds is None and xyz is None and classes is None:
             raise InputError("QML compounds need to be created in advance or Cartesian coordinates need to be passed in "
                              "order to generate the representation.")
 
-        if not is_none(self.representation):
+        if self.representation is not None:
             raise InputError("The representations have already been set!")
 
-        if is_none(self.compounds):
+        if self.compounds is None:
 
             self.representation, self.classes = self._generate_representations_from_data(xyz, classes)
 
-        elif is_none(xyz):
+        elif xyz is None:
             # Make representations from compounds
 
             self.representation, self.classes = self._generate_representations_from_compounds()
@@ -708,7 +708,7 @@ def set_properties(self, properties):
         :param y: array of properties of size (nsamples,)
         :type y: array
         """
-        if is_none(properties):
+        if properties is None:
             raise InputError("Properties cannot be set to none.")
         else:
             if is_numeric_array(properties) and np.asarray(properties).ndim == 1:
@@ -725,10 +725,10 @@ def set_representations(self, representations):
         :type representations: numpy array of shape (n_samples, n_features) or (n_samples, n_atoms, n_features)
         """
 
-        if not is_none(self.representation):
+        if self.representation is not None:
             raise InputError("The representations have already been set!")
 
-        if is_none(representations):
+        if representations is None:
             raise InputError("Descriptor cannot be set to none.")
         else:
             if is_numeric_array(representations):
@@ -745,7 +745,7 @@ def set_gradients(self, gradients):
         :return: None
         """
 
-        if is_none(gradients):
+        if gradients is None:
             raise InputError("Gradients cannot be set to none.")
         else:
             if is_numeric_array(gradients):
@@ -762,7 +762,7 @@ def set_classes(self, classes):
         :type classes: numpy array of shape (n_samples, n_atoms) of ints
         :return: None
         """
-        if is_none(classes):
+        if classes is None:
             raise InputError("Classes cannot be set to none.")
         else:
             if is_positive_integer_array(classes):
@@ -1050,7 +1050,7 @@ def _initialise_representation(self, representation, parameters):
             raise InputError("Unknown representation %s" % representation)
         self.representation_name = representation.lower()
 
-        if not is_none(parameters):
+        if parameters is not None:
             if not type(parameters) is dict:
                 raise InputError("The representation parameters passed should be either None or a dictionary.")
 
@@ -1060,7 +1060,7 @@ def _initialise_representation(self, representation, parameters):
 
         else:
 
-            if not is_none(parameters):
+            if parameters is not None:
                 raise InputError("The representation %s does not take any additional parameters." % (self.representation_name))
 
     def _set_representation(self, representation):
@@ -1098,7 +1098,7 @@ def _generate_representations_from_compounds(self):
         :rtype: numpy array of shape (n_samples, n_features) and None
         """
 
-        if is_none(self.compounds):
+        if self.compounds is None:
             raise InputError("This should never happen.")
 
         n_samples = len(self.compounds)
@@ -1368,18 +1368,18 @@ def _check_inputs(self, x, y, dy, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(dy) and not is_none(classes):
+        if dy is not None and classes is not None:
             raise InputError("MRMP estimator cannot predict gradients and do atomic decomposition.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, _ = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -1391,7 +1391,7 @@ def _check_inputs(self, x, y, dy, classes):
 
         else:
 
-            if is_none(y):
+            if y is None:
                 raise InputError("y cannot be of None type.")
 
             approved_x = check_global_representation(x)
@@ -1420,18 +1420,18 @@ def _check_predict_input(self, x, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(classes):
+        if classes is not None:
             raise InputError("MRMP estimator cannot do atomic decomposition.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, _ = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -1586,7 +1586,7 @@ def _initialise_representation(self, representation, parameters):
             raise InputError("Unknown representation %s" % representation)
         self.representation_name = representation.lower()
 
-        if not is_none(parameters):
+        if parameters is not None:
             if not type(parameters) is dict:
                 raise InputError("The representation parameters passed should be either None or a dictionary.")
             self._check_representation_parameters(parameters)
@@ -1601,7 +1601,7 @@ def _initialise_representation(self, representation, parameters):
 
         else:
 
-            if not is_none(parameters):
+            if parameters is not None:
                 raise InputError("The representation %s does not take any additional parameters." % (self.representation_name))
 
     def _set_representation(self, representation):
@@ -1624,7 +1624,7 @@ def _generate_representations_from_data(self, xyz, classes):
         :rtype: numpy arrays of shape (n_samples, n_atoms, n_features) and (n_samples, n_atoms)
         """
 
-        if is_none(classes):
+        if classes is None:
             raise InputError("The classes need to be provided for the ARMP estimator.")
         else:
             if len(classes.shape) > 2 or np.all(xyz.shape[:2] != classes.shape):
@@ -1743,7 +1743,7 @@ def _generate_representations_from_compounds(self):
         :rtype: numpy array of shape (n_samples, n_atoms, n_features) and (n_samples, n_atoms)
         """
 
-        if is_none(self.compounds):
+        if self.compounds is None:
             raise InputError("QML compounds needs to be created in advance")
 
         if self.representation_name == 'slatm':
@@ -2028,22 +2028,22 @@ def _check_inputs(self, x, y, dy, classes):
         if not is_array_like(x):
             raise InputError("x should be an array either containing indices or data.")
 
-        if not is_none(dy):
+        if dy is not None:
             raise InputError("ARMP estimator cannot be used to predict gradients. Use ARMP_G estimator.")
 
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
+            if self.representation is None:
 
-                if is_none(self.compounds):
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, self.classes = self._generate_representations_from_compounds()
 
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
-            if is_none(self.classes):
+            if self.classes is None:
                 raise InputError("The classes need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -2055,9 +2055,9 @@ def _check_inputs(self, x, y, dy, classes):
 
         else:
 
-            if is_none(y):
+            if y is None:
                 raise InputError("y cannot be of None type.")
-            if is_none(classes):
+            if classes is None:
                 raise InputError("ARMP estimator needs the classes to do atomic decomposition.")
 
             approved_x = check_local_representation(x)
@@ -2089,12 +2089,12 @@ def _check_predict_input(self, x, classes):
         # Check if x is made up of indices or data
         if is_positive_integer_or_zero_array(x):
 
-            if is_none(self.representation):
-                if is_none(self.compounds):
+            if self.representation is None:
+                if self.compounds is None:
                     raise InputError("No representations or QML compounds have been set yet.")
                 else:
                     self.representation, self.classes = self._generate_representations_from_compounds()
-            if is_none(self.properties):
+            if self.properties is None:
                 raise InputError("The properties need to be set in advance.")
 
             approved_x = self.representation[x]
@@ -2104,7 +2104,7 @@ def _check_predict_input(self, x, classes):
 
         else:
 
-            if is_none(classes):
+            if classes is None:
                 raise InputError("ARMP estimator needs the classes to do atomic decomposition.")
 
             approved_x = check_local_representation(x)
diff --git a/qml/arad/arad.py b/qml/arad/arad.py
index 1d226de6c..df36f9520 100644
--- a/qml/arad/arad.py
+++ b/qml/arad/arad.py
@@ -33,7 +33,7 @@
 from .farad_kernels import fget_atomic_kernels_arad
 from .farad_kernels import fget_atomic_symmetric_kernels_arad
 
-from qml.data.alchemy import PTP
+from qml.utils.alchemy import PTP
 
 def getAngle(sp,norms):
     epsilon = 10.* np.finfo(float).eps
diff --git a/qml/data/__init__.py b/qml/data/__init__.py
index 7b2ca3566..dcd15565d 100644
--- a/qml/data/__init__.py
+++ b/qml/data/__init__.py
@@ -23,4 +23,3 @@
 
 from .xyzdataprovider import XYZDataProvider
 from .compound import Compound
-from .alchemy import ELEMENT_NAME, NUCLEAR_CHARGE
diff --git a/qml/data/compound.py b/qml/data/compound.py
index bdd48c66f..2c95f5348 100644
--- a/qml/data/compound.py
+++ b/qml/data/compound.py
@@ -1,6 +1,6 @@
 # MIT License
 #
-# Copyright (c) 2016-2017 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm
+# Copyright (c) 2016-2018 Anders Steen Christensen, Felix Faber, Lars Andersen Bratholm
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,7 +25,7 @@
 import numpy as np
 import collections
 
-from .alchemy import NUCLEAR_CHARGE
+from ..utils import NUCLEAR_CHARGE
 
 from ..representations import generate_coulomb_matrix
 from ..representations import generate_atomic_coulomb_matrix
diff --git a/qml/data/dataprovider.py b/qml/data/dataprovider.py
index 72b8e0345..62418321f 100644
--- a/qml/data/dataprovider.py
+++ b/qml/data/dataprovider.py
@@ -44,4 +44,3 @@ def get_properties(self, idx=None):
     def read_database(self, db_filename):
 
         self.compounds = connect(db_filename)
-
diff --git a/qml/data/xyzdataprovider.py b/qml/data/xyzdataprovider.py
index 2ee4adc2b..fd55e5b37 100644
--- a/qml/data/xyzdataprovider.py
+++ b/qml/data/xyzdataprovider.py
@@ -40,4 +40,3 @@ def add_structures(self, xyz_filenames):
             print(i, xyz_filename, self.properties[i])
             compound = read(xyz_filename)
             self.compounds.write(compound)
- 
diff --git a/qml/fchl/fchl_electric_field_kernels.py b/qml/fchl/fchl_electric_field_kernels.py
index 6ba5ebc04..beb7c7fba 100644
--- a/qml/fchl/fchl_electric_field_kernels.py
+++ b/qml/fchl/fchl_electric_field_kernels.py
@@ -31,7 +31,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 # def get_local_kernels_ef(A, B, verbose=False, df=0.01, ef_scaling=0.01,\
diff --git a/qml/fchl/fchl_force_kernels.py b/qml/fchl/fchl_force_kernels.py
index 51a64b0b8..9a5b4cc5f 100644
--- a/qml/fchl/fchl_force_kernels.py
+++ b/qml/fchl/fchl_force_kernels.py
@@ -37,7 +37,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_gaussian_process_kernels(A, B, verbose=False, dx=0.005, \
diff --git a/qml/fchl/fchl_kernels.py b/qml/fchl/fchl_kernels.py
index bd3bb2c2d..feb450ef0 100644
--- a/qml/fchl/fchl_kernels.py
+++ b/qml/fchl/fchl_kernels.py
@@ -52,7 +52,7 @@
 
 from .fchl_kernel_functions import get_kernel_parameters
 
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_local_kernels(A, B, \
diff --git a/qml/fchl/fchl_representations.py b/qml/fchl/fchl_representations.py
index 60f8123a7..14d6183da 100644
--- a/qml/fchl/fchl_representations.py
+++ b/qml/fchl/fchl_representations.py
@@ -25,8 +25,8 @@
 import numpy as np
 import copy
 
-from qml.data.alchemy import get_alchemy
-from qml.data.alchemy import ELEMENT_NAME
+from qml.utils.alchemy import get_alchemy
+from qml.utils import ELEMENT_NAME
 
 def generate_representation(coordinates, nuclear_charges,
         max_size=23, neighbors=23, cut_distance = 5.0, cell=None):
diff --git a/qml/fchl/fchl_scalar_kernels.py b/qml/fchl/fchl_scalar_kernels.py
index 52f6733bc..2418d88c9 100644
--- a/qml/fchl/fchl_scalar_kernels.py
+++ b/qml/fchl/fchl_scalar_kernels.py
@@ -36,7 +36,7 @@
 from .ffchl_module import fget_atomic_local_kernels_fchl
 
 from .fchl_kernel_functions import get_kernel_parameters
-from qml.data.alchemy import get_alchemy
+from qml.utils.alchemy import get_alchemy
 
 
 def get_local_kernels(A, B, verbose=False,\
diff --git a/qml/kernels/kernels.py b/qml/kernels/kernels.py
index a13c3389b..81f53f9e0 100644
--- a/qml/kernels/kernels.py
+++ b/qml/kernels/kernels.py
@@ -24,7 +24,7 @@
 
 import numpy as np
 
-from .fkernels import fgaussian_kernel
+from .fkernels import fgaussian_kernel, fgaussian_kernel_symmetric
 from .fkernels import flaplacian_kernel
 from .fkernels import fgaussian_kernel_symmetric
 from .fkernels import flaplacian_kernel_symmetric
@@ -34,6 +34,7 @@
 
 from .fkernels import fget_local_kernels_gaussian
 from .fkernels import fget_local_kernels_laplacian
+from .fkernels import fget_vector_kernels_gaussian, fget_vector_kernels_gaussian_symmetric
 
 def laplacian_kernel(A, B, sigma):
     """ Calculates the Laplacian kernel matrix K, where :math:`K_{ij}`:
@@ -304,7 +305,7 @@ def get_local_kernels_gaussian(A, B, na, nb, sigmas):
 
     nma = len(na)
     nmb = len(nb)
-     
+
     sigmas = np.asarray(sigmas)
     nsigmas = len(sigmas)
 
diff --git a/qml/models/kernelridge.py b/qml/models/kernelridge.py
index 47fab8c79..32f61cb54 100644
--- a/qml/models/kernelridge.py
+++ b/qml/models/kernelridge.py
@@ -110,5 +110,3 @@ def _save(self, path, save_kernel=False):
         if save_kernel:
             np.save(path + "/K.npy")
 
-        
-
diff --git a/qml/qmlearn/__init__.py b/qml/qmlearn/__init__.py
new file mode 100644
index 000000000..d747a4c37
--- /dev/null
+++ b/qml/qmlearn/__init__.py
@@ -0,0 +1,27 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from .data import Data
+from . import representations
+from . import kernels
+from . import preprocessing
+from . import models
diff --git a/qml/qmlearn/data.py b/qml/qmlearn/data.py
new file mode 100644
index 000000000..3890e2641
--- /dev/null
+++ b/qml/qmlearn/data.py
@@ -0,0 +1,139 @@
+
+from __future__ import print_function
+
+import glob
+import numpy as np
+from ..utils import NUCLEAR_CHARGE
+import copy
+
+
+class Data(object):
+    """
+    Temporary data class which should be replaced at some point by the ASE-interface.
+    This could in principle also be replaced by a dictionary
+
+    """
+
+    def __init__(self, filenames=None, property_type = "energy"):
+        """
+        :param filenames: list of filenames or a string to be read by glob. e.g. 'dir/*.xyz'
+        :type filenames: list or string
+        :param property_type: What kind of property will be predicted ('energy')
+        :type property_type: string
+        """
+
+        self.property_type = property_type
+
+        self._set_ncompounds(0)
+        self.coordinates = None
+        self.nuclear_charges = None
+        self.natoms = None
+        self.energies = None
+
+        if isinstance(filenames, str):
+            filenames = sorted(glob.glob(filenames))
+        if isinstance(filenames, list):
+            self._parse_xyz_files(filenames)
+        # Overwritten in various parts of a standard prediction pipeline
+        # so don't use these within the class
+        #self._has_transformed_labels
+        #self._representations
+        #self._kernel
+        #self._indices
+        #self._representation_type
+        #self._representation_short_name
+        #self._representation_cutoff
+        #self._representation_alchemy
+
+    def _set_ncompounds(self, n):
+        self.ncompounds = n
+        # Hack for sklearn CV
+        self.shape = (n,)
+
+    def take(self, i, axis=None):
+        """
+        Hack for sklearn CV
+        """
+        other = copy.copy(self)
+        other._indices = i
+        return other
+
+    # Hack for sklearn CV
+    def __getitem__(self, i):
+        return i
+
+    # Hack for sklearn CV but also convenience
+    def __len__(self):
+        if hasattr(self, '_indices'):
+            return len(self._indices)
+        return self.ncompounds
+
+    # Hack for sklearn CV but also convenience
+    def __eq__(self, other):
+        """
+        Overrides the == operator.
+        """
+
+        if type(self) != type(other):
+            return False
+
+        self_vars = vars(self)
+        other_vars = vars(other)
+
+        if len(self_vars) != len(other_vars):
+            return False
+
+        for key, val in self_vars.items():
+            if val is not other_vars[key]:
+                return False
+
+        return True
+
+    # Hack for sklearn CV but also convenience
+    def __ne__(self, other):
+        """
+        Overrides the != operator (unnecessary in Python 3)
+        """
+        return not self.__eq__(other)
+
+    def set_energies(self, energies):
+        self.energies = energies
+
+    def _parse_xyz_files(self, filenames):
+        """
+        Parse a list of xyz files.
+        """
+
+        self._set_ncompounds(len(filenames))
+        self.coordinates = np.empty(self.ncompounds, dtype=object)
+        self.nuclear_charges = np.empty(self.ncompounds, dtype=object)
+        self.natoms = np.empty(self.ncompounds, dtype = int)
+
+        for i, filename in enumerate(filenames):
+            with open(filename, "r") as f:
+                lines = f.readlines()
+
+            natoms = int(lines[0])
+            self.natoms[i] = natoms
+            self.nuclear_charges[i] = np.empty(natoms, dtype=int)
+            self.coordinates[i] = np.empty((natoms, 3), dtype=float)
+
+            for j, line in enumerate(lines[2:natoms+2]):
+                tokens = line.split()
+
+                if len(tokens) < 4:
+                    break
+
+                self.nuclear_charges[i][j] = NUCLEAR_CHARGE[tokens[0]]
+                self.coordinates[i][j] = np.asarray(tokens[1:4], dtype=float)
+
+        # Try to convert dtype to int/float in cases where you have the
+        # same molecule, just different conformers
+
+        try:
+            self.nuclear_charges = np.asarray([self.nuclear_charges[i] for i in range(self.ncompounds)], 
+                    dtype=int)
+            self.coordinates = np.asarray([self.coordinates[i] for i in range(self.ncompounds)],
+                    dtype=float)
+        except ValueError:
+            pass
diff --git a/qml/qmlearn/kernels.py b/qml/qmlearn/kernels.py
new file mode 100644
index 000000000..a1157af2d
--- /dev/null
+++ b/qml/qmlearn/kernels.py
@@ -0,0 +1,845 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function
+
+import numpy as np
+
+from sklearn.base import BaseEstimator
+
+from .data import Data
+
+from ..kernels.fkernels import fgaussian_kernel
+from ..kernels.fkernels import fgaussian_kernel_symmetric
+from ..kernels.fkernels import fget_vector_kernels_gaussian
+from ..kernels.fkernels import fget_vector_kernels_gaussian_symmetric
+from ..kernels.fkernels import flaplacian_kernel
+from ..kernels.fkernels import flaplacian_kernel_symmetric
+from ..kernels.fkernels import fget_vector_kernels_laplacian
+from ..kernels.fkernels import fget_vector_kernels_laplacian_symmetric
+
+from ..fchl.ffchl_module import fget_kernels_fchl
+from ..fchl.ffchl_module import fget_symmetric_kernels_fchl
+from ..fchl.ffchl_module import fget_global_kernels_fchl
+from ..fchl.ffchl_module import fget_global_symmetric_kernels_fchl
+
+from ..utils.alchemy import get_alchemy
+from ..utils import get_unique
+
+class _BaseKernel(BaseEstimator):
+    """
+    Base class for kernels
+    """
+
+    def fit(self, X):
+        """
+        The fit routine is needed for scikit-learn pipelines.
+        This is actually never used but has to be here.
+        """
+        raise NotImplementedError
+
+    def transform(self, X, y = None):
+        """
+        The transform routine is needed for scikit-learn pipelines.
+        """
+        raise NotImplementedError
+
+    def generate(self, **params):
+        raise NotImplementedError
+
+    def _check_data_object(self, X):
+        # Check that we have a data object
+        if not isinstance(X, Data):
+            print("Error: Expected Data object as input in %s" % self.__class__.__name__)
+            raise SystemExit
+
+        if not hasattr(X, '_representation_short_name'):
+            print("Error: No representations found in Data object")
+            raise SystemExit
+
+        if X._representation_short_name == 'fchl' and self.__class__.__name__ != 'FCHLKernel' or \
+                X._representation_short_name != 'fchl' and self.__class__.__name__ == 'FCHLKernel':
+            print("Error: The FCHL representation is only compatible with the FCHL kernel")
+            raise SystemExit
+
+    def _set_representations(self, rep):
+        self.representations = rep
+
+    def _set_nuclear_charges(self, charge):
+        self.nuclear_charges = charge
+
+    def transform(self, X):
+
+        self._check_data_object(X)
+
+        # Kernel between representation stored in fit and representation
+        # given in data object. The order matters
+        if X._representation_type is None:
+            # For FCHL to keep the documentation tidy, since self.local overrides
+            # X._representation_type
+            kernel = self.generate(X._representations, self.representations)
+        elif X._representation_type == 'molecular':
+            # Ignore constant features
+            constant_features = (np.std(X._representations, axis=0) == 0) * self._constant_features * \
+                    (X._representations[0] == self.representations[0])
+            kernel = self.generate(X._representations[:,~constant_features], self.representations[:,~constant_features], 'molecular')
+        elif (self.alchemy == 'auto' and X._representation_alchemy) or self.alchemy:
+            # Ignore constant features
+            flat_representations = np.asarray([item for sublist in X._representations for item in sublist])
+            constant_features = (np.std(flat_representations, axis=0) == 0) * self._constant_features * \
+                    (X._representations[0][0] == self.representations[0][0])
+            X_representations = [np.asarray([rep[~constant_features] for rep in mol]) for mol in X._representations]
+            self_representations = [np.asarray([rep[~constant_features] for rep in mol]) for mol in self.representations]
+            kernel = self.generate(X_representations, self_representations, 'atomic')
+        else:
+            # Find elements used in representations from X
+            elements1 = get_unique(X.nuclear_charges[X._indices])
+            # Get the elements common to both X and the fitted representations
+            # stored in self
+            elements = list(set(elements1).intersection(self.elements))
+            ## Get list in the form [H_representations, C_representations, ...]
+            # And ignore constant features
+            rep1, rep2 = self._get_elementwise_representations_transform(X._representations, X.nuclear_charges[X._indices], elements)
+            # Sum elementwise contributions to the kernel
+            kernel = np.zeros((len(X._representations), len(self.representations)))
+            for i in range(len(rep1)):
+                kernel += self.generate(rep1[i], rep2[i], representation_type='atomic')
+
+        X._kernel = kernel
+
+        return X
+
+    def _get_elementwise_representations_transform(self, representations, nuclear_charges, elements):
+        """
+        Create a set of lists where the each item only contain representations of a specific element
+        """
+
+        # Ignore constant features
+        flat_nuclear_charges1 = np.asarray([item for sublist in nuclear_charges for item in sublist], dtype=int)
+        flat_representations1 = np.asarray([item for sublist in representations for item in sublist])
+        flat_nuclear_charges2 = np.asarray([item for sublist in self.nuclear_charges for item in sublist], dtype=int)
+        flat_representations2 = np.asarray([item for sublist in self.representations for item in sublist])
+
+        constant_features = {element: None for element in elements}
+
+        for ele in elements:
+            rep1 = flat_representations1[flat_nuclear_charges1 == ele]
+            rep2 = flat_representations2[flat_nuclear_charges2 == ele]
+            rep = np.concatenate([rep1, rep2])
+            constant_features[ele] = (np.std(rep, axis=0) == 0)
+
+        # Create the representations
+        elementwise_representations1 = [[np.atleast_2d(
+            [v[~constant_features[element]] for j,v in enumerate(representations[i]) 
+            if nuclear_charges[i][j] == element]) for i in range(len(nuclear_charges))]
+            for element in elements]
+
+        elementwise_representations2 = [[np.atleast_2d(
+            [v[~constant_features[element]] for j,v in enumerate(self.representations[i]) 
+            if self.nuclear_charges[i][j] == element]) for i in range(len(self.nuclear_charges))]
+            for element in elements]
+
+        return elementwise_representations1, elementwise_representations2
+
+    def _get_elementwise_representations_fit(self, representations, nuclear_charges):
+        """
+        Create a list where the each item only contain representations of a specific element
+        """
+        elements = get_unique(nuclear_charges)
+        self.elements = elements
+
+        # Ignore constant features
+        flat_nuclear_charges = np.asarray([item for sublist in nuclear_charges for item in sublist], dtype=int)
+        flat_representations = np.asarray([item for sublist in representations for item in sublist])
+
+        constant_features = {element: None for element in elements}
+
+        for ele in elements:
+            rep = flat_representations[flat_nuclear_charges == ele]
+            constant_features[ele] = (np.std(rep, axis=0) == 0)
+
+        # Create the representations
+        elementwise_representations = [[np.atleast_2d(
+            [v[~constant_features[element]] for j,v in enumerate(representations[i]) 
+            if nuclear_charges[i][j] == element]) for i in range(len(nuclear_charges))]
+            for element in elements]
+
+        return elementwise_representations
+
+    def _fit_transform(self, X):
+
+        self._check_data_object(X)
+
+        # Store representation / nuclear_charges for future transform calls
+        self._set_representations(X._representations)
+        self._set_nuclear_charges(X.nuclear_charges[X._indices])
+
+        if X._representation_type is None:
+            # For FCHL to keep the documentation tidy, since self.local overrides
+            # X._representation_type
+            kernel = self.generate(X._representations)
+        elif X._representation_type == 'molecular':
+            # Ignore constant features
+            self._constant_features = (np.std(X._representations, axis=0) == 0)
+            kernel = self.generate(X._representations[:,~self._constant_features], representation_type='molecular')
+        elif (self.alchemy == 'auto' and X._representation_alchemy) or self.alchemy:
+            # Ignore constant features
+            flat_representations = np.asarray([item for sublist in X._representations for item in sublist])
+            self._constant_features = (np.std(flat_representations, axis=0) == 0)
+            representations = [np.asarray([rep[~self._constant_features] for rep in mol]) for mol in X._representations]
+            kernel = self.generate(representations, representation_type='atomic')
+        else:
+            # Get list in the form [H_representations, C_representations, ...]
+            # And ignore constant features
+            rep = self._get_elementwise_representations_fit(X._representations, X.nuclear_charges[X._indices])
+            # Sum elementwise contributions to the kernel
+            kernel = np.zeros((len(X._representations),)*2)
+            for r in rep:
+                kernel += self.generate(r, representation_type='atomic')
+
+        # Store kernel
+        X._kernel = kernel
+
+        return X
+
+    def fit_transform(self, X, y=None):
+        return self._fit_transform(X)
+
+class GaussianKernel(_BaseKernel):
+    """
+    Gaussian kernel
+    """
+
+    def __init__(self, sigma='auto', alchemy='auto'):
+        """
+        :param sigma: Scale parameter of the gaussian kernel. `sigma='auto'` will try to guess
+                      a good value (very approximate).
+        :type sigma: float
+        :param alchemy: Determines if contributions from atoms of differing elements should be
+                        included in the kernel. If alchemy='auto', this will be automatically
+                        determined from the representation used
+        :type alchemy: bool
+        """
+        self.sigma = sigma
+        self.alchemy = alchemy
+
+        self.representations = None
+        self._elements = None
+        self._constant_features = None
+
+    def _quick_estimate_sigma(self, X, representation_type, sigma_init=100, count=1):
+        """
+        Use 50 random points for atomic, 200 for molecular, to get an approximate guess for sigma
+        """
+
+        if count > 10:
+            print("Error. Could not automatically determine parameter `sigma` in the kernel %s"
+                    % self.__class__.__name__)
+            raise SystemExit
+
+        if representation_type == 'molecular':
+            n = 200
+        else:
+            n = 50
+
+        if len(X) < n:
+            n = len(X)
+
+        self.sigma = sigma_init
+
+        # Generate kernel for a random subset
+        indices = np.random.choice(np.arange(len(X)), size=n, replace=False)
+        kernel = self.generate([X[i] for i in indices], representation_type=representation_type)
+
+        if representation_type == 'molecular':
+            # min smallest kernel element
+            smallest_kernel_element = np.min(kernel)
+
+            # Rescale sigma such that smallest kernel element will be equal to 1/2
+            self.sigma *= np.sqrt(-np.log(smallest_kernel_element) / np.log(2))
+            # Update sigma if the given sigma was completely wrong
+            if np.isinf(self.sigma):
+                self._quick_estimate_sigma(X, representation_type, sigma_init * 10, count + 1)
+            elif self.sigma <= 1e-12:
+                self._quick_estimate_sigma(X, representation_type, sigma_init / 10, count + 1)
+        else:
+            sizes = np.asarray([len(X[i]) for i in indices])
+            kernel /= sizes[:,None] * sizes[None,:]
+            smallest_kernel_element = np.min(kernel)
+
+            if smallest_kernel_element < 0.5:
+                self._quick_estimate_sigma(X, representation_type, sigma_init * 2.5, count + 1)
+            elif smallest_kernel_element > 0.95:
+                self._quick_estimate_sigma(X, representation_type, sigma_init / 2.5, count + 1)
+
+    def generate(self, X, Y=None, representation_type='molecular'):
+        """
+        Create a gaussian kernel from representations `X`. Optionally
+        an asymmetric kernel can be constructed between representations
+        `X` and `Y`.
+        If `representation_type=='molecular` it is assumed that the representations
+        are molecular and of shape (n_samples, representation_size).
+        If `representation_type=='atomic` the representations are assumed to be atomic
+        and the kernel will be computed as an atomic decomposition.
+
+        :param X: representations
+        :type X: array
+        :param Y: (Optional) representations
+        :type Y: array
+
+        :return: Gaussian kernel matrix of shape (n_samplesX, n_samplesX) if \
+                 Y=None else (n_samplesX, n_samplesY)
+        :rtype: array
+        """
+
+        if self.sigma == 'auto':
+            if representation_type == 'molecular':
+                auto_sigma = True
+            else:
+                auto_sigma = False
+
+            # Do a quick and dirty initial estimate of sigma
+            self._quick_estimate_sigma(X, representation_type)
+        else:
+            auto_sigma = False
+
+
+        if representation_type == 'molecular':
+            kernel = self._generate_molecular(X,Y)
+        elif representation_type == 'atomic':
+            kernel = self._generate_atomic(X,Y)
+        else:
+            # Should never get here for users
+            print("Error: representation_type needs to be 'molecular' or 'atomic'. Got %s" % representation_type)
+            raise SystemExit
+
+        if auto_sigma:
+            # Find smallest kernel element
+            smallest_kernel_element = np.min(kernel)
+            largest_kernel_element = np.max(kernel)
+            # Rescale kernel such that we don't have to calculate it again for a new sigma
+            alpha = - np.log(2) / np.log(smallest_kernel_element/largest_kernel_element)
+            self.sigma /= np.sqrt(alpha)
+            return kernel ** alpha
+
+        return kernel
+
+    def _generate_molecular(self, X, Y=None):
+
+        X = np.asarray(X)
+
+        # Note: Transposed for Fortran
+        n = X.shape[0]
+
+        if Y is None or X is Y:
+            # Do symmetric matrix
+            K = np.empty((n, n), order='F')
+            fgaussian_kernel_symmetric(X.T, n, K, self.sigma)
+        else:
+            Y = np.asarray(Y)
+            # Do asymmetric matrix
+            m = Y.shape[0]
+            K = np.empty((n, m), order='F')
+            fgaussian_kernel(X.T, n, Y.T, m, K, self.sigma)
+
+        return K
+
+    def _generate_atomic(self, X, Y=None):
+
+        n1 = np.array([len(x) for x in X], dtype=np.int32)
+
+        max1 = np.max(n1)
+
+        nm1 = n1.size
+
+        for i in range(len(X)):
+            rep_size = X[0].shape[1]
+            if rep_size == 0:
+                continue
+            break
+        else:
+            if Y is None:
+                return np.zeros((nm1, nm1))
+            return np.zeros((nm1, len(Y)))
+
+        x1 = np.zeros((nm1, max1, rep_size), dtype=np.float64, order="F")
+
+        for i in range(nm1):
+            if X[i].shape[1] == 0:
+                n1[i] = 0
+                continue
+            x1[i,:n1[i]] = X[i]
+
+
+        # Reorder for Fortran speed
+        x1 = np.swapaxes(x1, 0, 2)
+
+        sigmas = np.array([self.sigma], dtype=np.float64)
+        nsigmas = sigmas.size
+
+        if Y is None or X is Y:
+            # Do symmetric matrix
+            return fget_vector_kernels_gaussian_symmetric(x1, n1, [self.sigma],
+                nm1, nsigmas)[0]
+        else:
+            # Do asymmetric matrix
+            n2 = np.array([len(y) for y in Y], dtype=np.int32)
+            max2 = np.max(n2)
+            nm2 = n2.size
+            x2 = np.zeros((nm2, max2, rep_size), dtype=np.float64, order="F")
+            for i in range(nm2):
+                if Y[i].shape[1] == 0:
+                    n2[i] = 0
+                    continue
+                x2[i,:n2[i]] = Y[i]
+            x2 = np.swapaxes(x2, 0, 2)
+            return fget_vector_kernels_gaussian(x1, x2, n1, n2, [self.sigma],
+                nm1, nm2, nsigmas)[0]
+
+class LaplacianKernel(_BaseKernel):
+    """
+    Laplacian kernel
+    """
+
+    def __init__(self, sigma='auto', alchemy='auto'):
+        """
+        :param sigma: Scale parameter of the gaussian kernel. `sigma='auto'` will try to guess
+                      a good value (very approximate).
+        :type sigma: float or string
+        :param alchemy: Determines if contributions from atoms of differing elements should be
+                        included in the kernel.
+        :type alchemy: bool
+        """
+        self.sigma = sigma
+        self.alchemy = alchemy
+
+        self.representations = None
+        self._elements = None
+        self._constant_features = None
+
+    def _quick_estimate_sigma(self, X, representation_type, sigma_init=100, count=1):
+        """
+        Use 50 random points for atomic, 200 for molecular, to get an approximate guess for sigma
+        """
+
+        if count > 10:
+            print("Error. Could not automatically determine parameter `sigma` in the kernel %s"
+                    % self.__class__.__name__)
+            raise SystemExit
+
+        if representation_type == 'molecular':
+            n = 200
+        else:
+            n = 50
+
+        if len(X) < n:
+            n = len(X)
+
+        self.sigma = sigma_init
+
+        # Generate kernel for a random subset
+        indices = np.random.choice(np.arange(len(X)), size=n, replace=False)
+        kernel = self.generate([X[i] for i in indices], representation_type=representation_type)
+
+        if representation_type == 'molecular':
+            # min smallest kernel element
+            smallest_kernel_element = np.min(kernel)
+
+            # Rescale sigma such that smallest kernel element will be equal to 1/2
+            self.sigma *= - np.log(smallest_kernel_element) / np.log(2)
+            # Update sigma if the given sigma was completely wrong
+            if np.isinf(self.sigma):
+                self._quick_estimate_sigma(X, representation_type, sigma_init * 10, count + 1)
+            elif self.sigma <= 1e-12:
+                self._quick_estimate_sigma(X, representation_type, sigma_init / 10, count + 1)
+        else:
+            sizes = np.asarray([len(X[i]) for i in indices])
+            kernel /= sizes[:,None] * sizes[None,:]
+            smallest_kernel_element = np.min(kernel)
+
+            if smallest_kernel_element < 0.5:
+                self._quick_estimate_sigma(X, representation_type, sigma_init * 2.5, count + 1)
+            elif smallest_kernel_element > 1:
+                self._quick_estimate_sigma(X, representation_type, sigma_init / 2.5, count + 1)
+
+    def generate(self, X, Y=None, representation_type='molecular'):
+        """
+        Create a gaussian kernel from representations `X`. Optionally
+        an asymmetric kernel can be constructed between representations
+        `X` and `Y`.
+        If `representation_type=='molecular` it is assumed that the representations
+        are molecular and of shape (n_samples, representation_size).
+        If `representation_type=='atomic` the representations are assumed to be atomic
+        and the kernel will be computed as an atomic decomposition.
+
+        :param X: representations
+        :type X: array
+        :param Y: (Optional) representations
+        :type Y: array
+
+        :return: Gaussian kernel matrix of shape (n_samplesX, n_samplesX) if \
+                 Y=None else (n_samplesX, n_samplesY)
+        :rtype: array
+        """
+
+        if self.sigma == 'auto':
+            if representation_type == 'molecular':
+                auto_sigma = True
+            else:
+                auto_sigma = False
+
+            # Do a quick and dirty initial estimate of sigma
+            self._quick_estimate_sigma(X, representation_type)
+        else:
+            auto_sigma = False
+
+
+        if representation_type == 'molecular':
+            kernel = self._generate_molecular(X,Y)
+        elif representation_type == 'atomic':
+            kernel = self._generate_atomic(X,Y)
+        else:
+            # Should never get here for users
+            print("Error: representation_type needs to be 'molecular' or 'atomic'. Got %s" % representation_type)
+            raise SystemExit
+
+        if auto_sigma:
+            # Find smallest kernel element
+            smallest_kernel_element = np.min(kernel)
+            largest_kernel_element = np.max(kernel)
+            # Rescale kernel such that we don't have to calculate it again for a new sigma
+            alpha = - np.log(2) / np.log(smallest_kernel_element/largest_kernel_element)
+            self.sigma /= alpha
+            return kernel ** alpha
+
+        return kernel
+
+    def _generate_molecular(self, X, Y=None):
+
+        X = np.asarray(X)
+
+        # Note: Transposed for Fortran
+        n = X.shape[0]
+
+        if Y is None or X is Y:
+            # Do symmetric matrix
+            K = np.empty((n, n), order='F')
+            flaplacian_kernel_symmetric(X.T, n, K, self.sigma)
+        else:
+            Y = np.asarray(Y)
+            # Do asymmetric matrix
+            m = Y.shape[0]
+            K = np.empty((n, m), order='F')
+            flaplacian_kernel(X.T, n, Y.T, m, K, self.sigma)
+
+        return K
+
+    def _generate_atomic(self, X, Y=None):
+
+        n1 = np.array([len(x) for x in X], dtype=np.int32)
+
+        max1 = np.max(n1)
+
+        nm1 = n1.size
+
+        for i in range(len(X)):
+            rep_size = X[0].shape[1]
+            if rep_size == 0:
+                continue
+            break
+        else:
+            if Y is None:
+                return np.zeros((nm1, nm1))
+            return np.zeros((nm1, len(Y)))
+
+        x1 = np.zeros((nm1, max1, rep_size), dtype=np.float64, order="F")
+
+        for i in range(nm1):
+            if X[i].shape[1] == 0:
+                n1[i] = 0
+                continue
+            x1[i,:n1[i]] = X[i]
+
+        #for i,j in zip(x1[:,:,2], n1):
+        #    print(i,j)
+
+
+        # Reorder for Fortran speed
+        x1 = np.swapaxes(x1, 0, 2)
+
+        sigmas = np.array([self.sigma], dtype=np.float64)
+        nsigmas = sigmas.size
+
+        if Y is None or X is Y:
+            # Do symmetric matrix
+            return fget_vector_kernels_laplacian_symmetric(x1, n1, [self.sigma],
+                nm1, nsigmas)[0]
+        else:
+            # Do asymmetric matrix
+            n2 = np.array([len(y) for y in Y], dtype=np.int32)
+            max2 = np.max(n2)
+            nm2 = n2.size
+            x2 = np.zeros((nm2, max2, rep_size), dtype=np.float64, order="F")
+            for i in range(nm2):
+                if Y[i].shape[1] == 0:
+                    n2[i] = 0
+                    continue
+                x2[i,:n2[i]] = Y[i]
+            x2 = np.swapaxes(x2, 0, 2)
+            return fget_vector_kernels_laplacian(x1, x2, n1, n2, [self.sigma],
+                nm1, nm2, nsigmas)[0]
+
+class FCHLKernel(_BaseKernel):
+    """
+    FCHL kernel
+    """
+
+    def __init__(self, sigma='auto', alchemy=True, two_body_scaling=np.sqrt(8),
+            three_body_scaling=1.6, two_body_width=0.2, three_body_width=np.pi,
+            two_body_power=4.0, three_body_power=2.0, damping_start=1.0, cutoff=5.0,
+            fourier_order=1, alchemy_period_width=1.6, alchemy_group_width=1.6,
+            local=True):
+        """
+        :param sigma: Scale parameter of the gaussian basis functions. `sigma='auto'` will try to guess
+                      a good value (very approximate).
+        :type sigma: float or string
+        :param alchemy: Determines if contributions from atoms of differing elements should be
+                        included in the kernel.
+        :type alchemy: bool
+        :param two_body_scaling: Weight for 2-body terms.
+        :type two_body_scaling: float
+        :param three_body_scaling: Weight for 3-body terms.
+        :type three_body_scaling: float
+        :param two_body_width: Gaussian width for 2-body terms
+        :type two_body_width: float
+        :param three_body_width: Gaussian width for 3-body terms.
+        :type three_body_width: float
+        :param two_body_power: Powerlaw for :math:`r^{-n}` 2-body terms.
+        :type two_body_power: float
+        :param three_body_power: Powerlaw for Axilrod-Teller-Muto 3-body term
+        :type three_body_power: float
+        :param damping_start: The fraction of the cutoff radius at which cut-off damping start.
+        :type damping_start: float
+        :param cutoff: Cut-off radius.
+        :type cutoff: float
+        :param fourier_order: 3-body Fourier-expansion truncation order.
+        :type fourier_order: integer
+        :param alchemy_period_width: Gaussian width along periods (columns) in the periodic table.
+        :type alchemy_period_width: float
+        :param alchemy_group_width: Gaussian width along groups (rows) in the periodic table.
+        :type alchemy_group_width: float
+        :param local: Do local decomposition
+        :param local: bool
+        """
+        self.sigma = sigma
+        self.alchemy = alchemy
+        self.two_body_scaling = two_body_scaling
+        self.three_body_scaling = three_body_scaling
+        self.two_body_width = two_body_width
+        self.three_body_width = three_body_width
+        self.two_body_power = two_body_power
+        self.three_body_power = three_body_power
+        self.damping_start = damping_start
+        self.cutoff = cutoff
+        self.fourier_order = fourier_order
+        self.alchemy_period_width = alchemy_period_width
+        self.alchemy_group_width = alchemy_group_width
+        self.local = local
+
+        self.representations = None
+
+    def _quick_estimate_sigma(self, X, sigma_init=1, count=1):
+        """
+        Use 50 random points for atomic, 200 for molecular, to get an approximate guess for sigma
+        """
+
+        if count > 10:
+            print("Error. Could not automatically determine parameter `sigma` in the kernel %s"
+                    % self.__class__.__name__)
+            raise SystemExit
+
+        n = 50
+
+        if len(X) < n:
+            n = len(X)
+
+        self.sigma = sigma_init
+
+        # Generate kernel for a random subset
+        indices = np.random.choice(np.arange(len(X)), size=n, replace=False)
+        kernel = self.generate(X)#[indices])
+
+        if not self.local:
+            # min smallest kernel element
+            smallest_kernel_element = np.min(kernel)
+            if smallest_kernel_element < 0.3:
+                self._quick_estimate_sigma(X, sigma_init * 2.5, count + 1)
+            elif smallest_kernel_element > 0.9:
+                self._quick_estimate_sigma(X, sigma_init / 2.5, count + 1)
+
+        else:
+            sizes = np.asarray([len(X[i]) for i in indices])
+            kernel /= sizes[:,None] * sizes[None,:]
+            smallest_kernel_element = np.min(kernel)
+
+            if smallest_kernel_element < 0.1:
+                self._quick_estimate_sigma(X, sigma_init * 2.5, count + 1)
+            elif smallest_kernel_element > 0.3:
+                self._quick_estimate_sigma(X, sigma_init / 2.5, count + 1)
+
+    def generate(self, X, Y=None):
+        """
+        Create a kernel from representations `X`. Optionally
+        an asymmetric kernel can be constructed between representations
+        `X` and `Y`.
+
+        :param X: representations
+        :type X: array
+        :param Y: (Optional) representations
+        :type Y: array
+
+        :return: Gaussian kernel matrix of shape (n_samplesX, n_samplesX) if \
+                 Y=None else (n_samplesX, n_samplesY)
+        :rtype: array
+        """
+
+        if self.sigma == 'auto':
+            # Do a quick and dirty initial estimate of sigma
+            self._quick_estimate_sigma(X)
+
+
+        if not self.local:
+            kernel = self._generate_molecular(X,Y)
+        else:
+            kernel = self._generate_atomic(X,Y)
+
+        return kernel
+
+    def _generate_molecular(self, X, Y=None):
+
+        atoms_max = X.shape[1]
+        neighbors_max = X.shape[3]
+        nm1 = X.shape[0]
+        N1 = np.zeros((nm1),dtype=np.int32)
+
+        for a in range(nm1):
+            N1[a] = len(np.where(X[a,:,1,0] > 0.0001)[0])
+
+        neighbors1 = np.zeros((nm1, atoms_max), dtype=np.int32)
+
+        for a, representation in enumerate(X):
+            ni = N1[a]
+            for i, x in enumerate(representation[:ni]):
+                neighbors1[a,i] = len(np.where(x[0] < self.cutoff)[0])
+
+        if self.alchemy:
+            alchemy = 'periodic-table'
+        else:
+            alchemy = 'off'
+
+        doalchemy, pd = get_alchemy(alchemy, emax=100, r_width=self.alchemy_group_width, c_width=self.alchemy_period_width)
+
+        if Y is None or X is Y:
+            # Do symmetric kernel
+            return fget_global_symmetric_kernels_fchl(X, N1, neighbors1, [self.sigma],
+                        nm1, 1, self.three_body_width, self.two_body_width, self.damping_start,
+                        self.cutoff, self.fourier_order, pd, self.two_body_scaling, self.three_body_scaling,
+                        doalchemy, self.two_body_power, self.three_body_power)[0]
+        else:
+            # Do asymmetric kernel
+            nm2 = Y.shape[0]
+            N2 = np.zeros((nm2),dtype=np.int32)
+
+            for a in range(nm2):
+                N2[a] = len(np.where(Y[a,:,1,0] > 0.0001)[0])
+
+            neighbors2 = np.zeros((nm2, atoms_max), dtype=np.int32)
+
+            for a, representation in enumerate(Y):
+                ni = N2[a]
+                for i, x in enumerate(representation[:ni]):
+                    neighbors2[a,i] = len(np.where(x[0] < self.cutoff)[0])
+
+            return fget_global_kernels_fchl(X, Y, N1, N2, neighbors1, neighbors2, [self.sigma],
+                        nm1, nm2, 1, self.three_body_width, self.two_body_width, self.damping_start,
+                        self.cutoff, self.fourier_order, pd, self.two_body_scaling, self.three_body_scaling,
+                        doalchemy, self.two_body_power, self.three_body_power)[0]
+
+    def _generate_atomic(self, X, Y=None):
+
+        atoms_max = X.shape[1]
+        neighbors_max = X.shape[3]
+
+        nm1 = X.shape[0]
+        N1 = np.zeros((nm1),dtype=np.int32)
+
+        for a in range(nm1):
+            N1[a] = len(np.where(X[a,:,1,0] > 0.0001)[0])
+
+        neighbors1 = np.zeros((nm1, atoms_max), dtype=np.int32)
+
+        for a, representation in enumerate(X):
+            ni = N1[a]
+            for i, x in enumerate(representation[:ni]):
+                neighbors1[a,i] = len(np.where(x[0] < self.cutoff)[0])
+
+        if self.alchemy:
+            alchemy = 'periodic-table'
+        else:
+            alchemy = 'off'
+
+        doalchemy, pd = get_alchemy(alchemy, emax=100, r_width=self.alchemy_group_width, c_width=self.alchemy_period_width)
+
+        if Y is None or X is Y:
+            return fget_symmetric_kernels_fchl(X, N1, neighbors1, [self.sigma],
+                        nm1, 1, self.three_body_width, self.two_body_width, self.damping_start,
+                        self.cutoff, self.fourier_order, pd, self.two_body_scaling, self.three_body_scaling,
+                        doalchemy, self.two_body_power, self.three_body_power)[0]
+        else:
+            nm2 = Y.shape[0]
+            N2 = np.zeros((nm2),dtype=np.int32)
+
+            for a in range(nm2):
+                N2[a] = len(np.where(Y[a,:,1,0] > 0.0001)[0])
+
+            neighbors2 = np.zeros((nm2, atoms_max), dtype=np.int32)
+
+            for a, representation in enumerate(Y):
+                ni = N2[a]
+                for i, x in enumerate(representation[:ni]):
+                    neighbors2[a,i] = len(np.where(x[0] < self.cutoff)[0])
+
+            return fget_kernels_fchl(X, Y, N1, N2, neighbors1, neighbors2, [self.sigma],
+                        nm1, nm2, 1, self.three_body_width, self.two_body_width, self.damping_start, 
+                        self.cutoff, self.fourier_order, pd, self.two_body_scaling, self.three_body_scaling, 
+                        doalchemy, self.two_body_power, self.three_body_power)[0]
+
+    def fit_transform(self, X, y=None):
+
+        # Check that the cutoff is less than the cutoff used to make the representation
+        if self.cutoff > X._representation_cutoff:
+            print("Error: Cutoff used in the FCHL kernel (%.2f) must be lower or equal" % self.cutoff,
+                    "to the cutoff used in the FCHL representation (%.2f)" % X._representation_cutoff)
+            raise SystemExit
+
+        return self._fit_transform(X)
+
diff --git a/qml/qmlearn/models.py b/qml/qmlearn/models.py
new file mode 100644
index 000000000..f53c02d1a
--- /dev/null
+++ b/qml/qmlearn/models.py
@@ -0,0 +1,161 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars A. Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import division, absolute_import, print_function
+
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.metrics import mean_absolute_error
+
+from ..utils import is_numeric_array
+from .data import Data
+from ..math import cho_solve
+
+class _BaseModel(BaseEstimator):
+    """
+    Base class for all regression models
+    """
+
+    _estimator_type = "regressor"
+
+    def fit(self, X):
+        raise NotImplementedError
+
+    def predict(self, X):
+        return NotImplementedError
+
+    def score(self, X, y=None):
+        """
+        Make predictions on `X` and return a score
+
+        :param X: Data object
+        :type X: object
+        :param y: Energies
+        :type y: array
+        :return: score
+        :rtype: float
+        """
+
+        # Make predictions
+        y_pred = self.predict(X)
+
+        # Get the true values
+        if is_numeric_array(y):
+            pass
+
+        elif isinstance(X, Data):
+            try:
+                y = X.energies[X._indices]
+            except:
+                print("No kernel energies found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+
+        else:
+            print("Expected variable 'X' to be Data object. Got %s" % str(X))
+            raise SystemExit
+
+        # Return the score
+        if self.scoring == 'mae':
+            return mean_absolute_error(y, y_pred)
+        elif self.scoring == 'neg_mae':
+            return - mean_absolute_error(y, y_pred)
+        elif self.scoring == 'rmsd':
+            return np.sqrt(mean_squared_error(y, y_pred))
+        elif self.scoring == 'neg_rmsd':
+            return - np.sqrt(mean_squared_error(y, y_pred))
+        elif self.scoring == 'neg_log_mae':
+            return - np.log(mean_absolute_error(y, y_pred))
+
+class KernelRidgeRegression(_BaseModel):
+    """
+    Standard Kernel Ridge Regression using a cholesky solver
+    """
+
+    def __init__(self, l2_reg=1e-10, scoring='neg_mae'):
+        """
+        :param llambda: l2 regularization
+        :type llambda: float
+        :param scoring: Metric used for scoring ('mae', 'neg_mae', 'rmsd', 'neg_rmsd', 'neg_log_mae')
+        :type scoring: string
+        """
+        self.l2_reg = l2_reg
+        self.scoring = scoring
+
+        self.alpha = None
+
+    def fit(self, X, y=None):
+        """
+        Fit the Kernel Ridge Regression model using a cholesky solver
+
+        :param X: Data object or kernel
+        :type X: object or array
+        :param y: Energies
+        :type y: array
+        """
+
+        if isinstance(X, Data):
+            try:
+                K, y = X._kernel, X.energies[X._indices]
+            except:
+                print("No kernel matrix and/or energies found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[0] == X.shape[1] and y is not None:
+            K = X
+        else:
+            print("Expected variable 'X' to be kernel matrix or Data object. Got %s" % str(X))
+            raise SystemExit
+
+
+        K[np.diag_indices_from(K)] += self.l2_reg
+
+        self.alpha = cho_solve(K, y)
+
+    def predict(self, X):
+        """
+        Fit the Kernel Ridge Regression model using a cholesky solver
+
+        :param X: Data object
+        :type X: object
+        :param y: Energies
+        :type y: array
+        """
+
+        # Check if model has been fit
+        if self.alpha is None:
+            print("Error: The %s model has not been trained yet" % self.__class__.__name__)
+            raise SystemExit
+
+        if isinstance(X, Data):
+            try:
+                K = X._kernel
+            except:
+                print("No kernel matrix found in data object in module %s" % self.__class__.__name__)
+                raise SystemExit
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[1] == self.alpha.size:
+            K = X
+        elif is_numeric_array(X) and X.ndim == 2 and X.shape[0] == self.alpha.size:
+            K = X.T
+        else:
+            print("Expected variable 'X' to be kernel matrix or Data object. Got %s" % str(X))
+            raise SystemExit
+
+        return np.dot(K, self.alpha)
diff --git a/qml/qmlearn/preprocessing.py b/qml/qmlearn/preprocessing.py
new file mode 100644
index 000000000..cab329075
--- /dev/null
+++ b/qml/qmlearn/preprocessing.py
@@ -0,0 +1,237 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function
+
+import copy
+
+import numpy as np
+from sklearn.base import BaseEstimator
+from sklearn.linear_model import LinearRegression
+
+from .data import Data
+from ..utils import is_numeric_array, get_unique, is_positive_integer_or_zero_array
+
+class AtomScaler(BaseEstimator):
+    """
+    Subtracts any constant offset or linear dependency on the number of atoms of each element from the property
+    """
+
+    def __init__(self, data=None, elements='auto'):
+        """
+        :param data: Data object (optional)
+        :type data: Data object
+        :param elements: Elements to support. If `elements='auto'` try to determine this automatically.
+        :type elements: array
+        :param normalize: Normalize the transformed data such that the standard deviation is 1
+        :type normalize: bool
+        """
+        # Shallow copy should be fine
+        self._set_data(data)
+        self.elements = elements
+
+        # Initialize model
+        self.model = LinearRegression()
+
+    def _preprocess_input(self, X):
+        """
+        Convenience function that processes X in a way such that
+        X can both be a data object, or an array of indices. And y
+        can be either values to transform or None.
+
+        :param X: Data object, floating values or integer array of indices
+        :type X: Data object or array
+        :param y: Values or None
+        :type y: array or None
+        :return: Nuclear charges and values to transform
+        :rtype: tuple
+        """
+
+        if isinstance(X, Data):
+
+            self._check_data(X)
+
+            data = copy.copy(X)
+
+            # Part of the sklearn CV hack.
+            if not hasattr(data, '_indices'):
+                data._indices = np.arange(len(data))
+
+            if hasattr(data, '_has_transformed_labels'):
+                print("Error: Target data has already been transformed by %s" % self.__class__.__name__)
+                raise SystemExit
+
+            transformed_labels = np.zeros(len(data), dtype=bool)
+            transformed_labels[data._indices] = True
+            data._has_transformed_labels = transformed_labels
+
+        elif self.data and is_positive_integer_or_zero_array(X) \
+                and max(X) <= self.data.natoms.size:
+            # A copy here might avoid some unintended behaviour
+            # if multiple models is used sequentially.
+            data = copy.copy(self.data)
+            data._indices = np.asarray(X, dtype=int).ravel()
+
+
+            if hasattr(data, '_has_transformed_labels'):
+                if any(data._has_transformed_labels[data._indices] == True):
+                    print("Error: Target data has already been transformed by %s" % self.__class__.__name__)
+                    raise SystemExit
+                data._has_transformed_labels[data._indices] = True
+            else:
+                transformed_labels = np.zeros(len(data.energies), dtype=bool)
+                transformed_labels[data._indices] = True
+                data._has_transformed_labels = transformed_labels
+
+        else:
+            print("Expected X to be array of indices or Data object. Got %s" % str(X))
+            raise SystemExit
+
+        return data
+
+    def _check_data(self, X):
+        if X.natoms is None:
+            print("Error: Empty Data object passed to the %s transformer" % self.__class__.__name__)
+            raise SystemExit
+
+        if X.energies is None:
+            print("Error: Expected Data object to have non-empty attribute 'energies'" % self.__class__.__name__)
+            raise SystemExit
+
+
+    def _set_data(self, data):
+        if data:
+            self._check_data(data)
+        self.data = data
+
+    def fit_transform(self, X, y=None):
+        """
+        Fit and transform the data with a linear model.
+        Supports three different types of input.
+        1) X is a list of nuclear charges and y is values to transform.
+        2) X is an array of indices of which to transform.
+        3) X is a data object
+
+        :param X: List with nuclear charges or Data object.
+        :type X: list
+        :param y: Values to transform
+        :type y: array or None
+        :return: Array of transformed values or Data object, depending on input
+        :rtype: array or Data object
+        """
+
+        if not isinstance(X, Data) and y is not None:
+            data = None
+            nuclear_charges = X
+        else:
+            data = self._preprocess_input(X)
+            nuclear_charges = data.nuclear_charges[data._indices]
+            y = data.energies[data._indices]
+
+        if self.elements == 'auto':
+            self.elements = get_unique(nuclear_charges)
+        else:
+            self._check_elements(nuclear_charges)
+
+
+        features = self._featurizer(nuclear_charges)
+
+        delta_y = y - self.model.fit(features, y).predict(features)
+
+        if data:
+            # Force copy
+            data.energies = data.energies.copy()
+            data.energies[data._indices] = delta_y
+            return data
+        else:
+            return delta_y
+
+    def _check_elements(self, nuclear_charges):
+        """
+        Check that the elements in the given nuclear_charges was
+        included in the fit.
+        """
+
+        elements_transform = get_unique(nuclear_charges)
+        if not np.isin(elements_transform, self.elements).all():
+            print("Warning: Trying to transform molecules with elements",
+                  "not included during fit in the %s method." % self.__class__.__name__,
+                  "%s used in training but trying to transform %s" % (str(self.elements), str(elements_transform)))
+
+    def _featurizer(self, X):
+        """
+        Get the counts of each element as features.
+        """
+
+        n = len(X)
+        m = len(self.elements)
+        element_to_index = {v:i for i, v in enumerate(self.elements)}
+        features = np.zeros((n,m), dtype=int)
+
+        for i, x in enumerate(X):
+            count_dict = {k:v for k,v in zip(*np.unique(x, return_counts=True))}
+            for key, value in count_dict.items():
+                if key not in element_to_index:
+                    continue
+                j = element_to_index[key]
+                features[i, j] = value
+
+        return features
+
+    def transform(self, X, y=None):
+        """
+        Transform the data with the fitted linear model.
+        Supports three different types of input.
+        1) X is a list of nuclear charges and y is values to transform.
+        2) X is an array of indices of which to transform.
+        3) X is a data object
+
+        :param X: List with nuclear charges or Data object.
+        :type X: list
+        :param y: Values to transform
+        :type y: array or None
+        :return: Array of transformed values or Data object, depending on input
+        :rtype: array or Data object
+        """
+
+        if not isinstance(X, Data) and y is not None:
+            data = None
+            nuclear_charges = X
+        else:
+            data = self._preprocess_input(X)
+            nuclear_charges = data.nuclear_charges[data._indices]
+            y = data.energies[data._indices]
+
+        self._check_elements(nuclear_charges)
+
+        features = self._featurizer(nuclear_charges)
+
+        delta_y = y - self.model.predict(features)
+
+        if data:
+            # Force copy
+            data.energies = data.energies.copy()
+            data.energies[data._indices] = delta_y
+            return data
+        else:
+            return delta_y
+
diff --git a/qml/qmlearn/representations.py b/qml/qmlearn/representations.py
new file mode 100644
index 000000000..5cece8918
--- /dev/null
+++ b/qml/qmlearn/representations.py
@@ -0,0 +1,800 @@
+# MIT License
+#
+# Copyright (c) 2018 Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import print_function
+
+import copy
+import itertools
+
+import numpy as np
+from sklearn.base import BaseEstimator
+
+from .data import Data
+from ..utils import is_positive_integer_or_zero_array, get_unique, get_pairs, is_string
+from ..representations.frepresentations import fgenerate_coulomb_matrix
+from ..representations.frepresentations import fgenerate_unsorted_coulomb_matrix
+from ..representations.frepresentations import fgenerate_local_coulomb_matrix
+from ..representations.frepresentations import fgenerate_atomic_coulomb_matrix
+from ..representations.representations import get_slatm_mbtypes
+from ..representations.representations import generate_slatm
+from ..representations.facsf import fgenerate_acsf
+from ..fchl.fchl_representations import generate_representation
+
+class _BaseRepresentation(BaseEstimator):
+    """
+    Base class for representations
+    """
+
+    # Variables that has to be set in child methods
+    _representation_short_name = None
+    _representation_type = None
+    alchemy = None
+
+
+    def fit(self, X, y=None):
+        """
+        The fit routine is needed for scikit-learn pipelines.
+        Must return self.
+        """
+        raise NotImplementedError
+
+    def transform(self, X):
+        """
+        The transform routine is needed for scikit-learn pipelines.
+        Needs to store the representations in self.data.representations
+        and return self.data.
+        """
+        raise NotImplementedError
+
+    def _extract_data(self, X):
+        """
+        Convenience function that processes X in a way such that
+        X can both be a data object or an array of indices.
+
+        :param X: Data object or array of indices
+        :type X: Data object or array
+        :return: Data object
+        :rtype: Data object
+        """
+
+        if isinstance(X, Data):
+            self._check_data(X)
+            data = copy.copy(X)
+            if not hasattr(data, "_indices"):
+                data._indices = np.arange(len(data))
+
+        elif self.data and is_positive_integer_or_zero_array(X) \
+                and max(X) <= self.data.natoms.size:
+            # A copy here might avoid some unintended behaviour
+            # if multiple models is used sequentially.
+            data = copy.copy(self.data)
+            data._indices = np.asarray(X, dtype=int).ravel()
+        else:
+            print("Expected X to be array of indices or Data object. Got %s" % str(X))
+            raise SystemExit
+
+        # Store representation type / name for later use
+        data._representation_type = self._representation_type
+        data._representation_short_name = self._representation_short_name
+        data._representation_alchemy = self.alchemy
+
+        return data
+
+    def _check_data(self, X):
+        if X.natoms is None:
+            print("Error: Empty Data object passed to %s" % self.__class__.__name__)
+            raise SystemExit
+
+    def _set_data(self, data):
+        if data:
+            self._check_data(data)
+
+        self.data = data
+
+    def _check_elements(self, nuclear_charges):
+        """
+        Check that the elements in the given nuclear_charges was
+        included in the fit.
+        """
+
+        elements_transform = get_unique(nuclear_charges)
+        if not np.isin(elements_transform, self.elements).all():
+            print("Warning: Trying to transform molecules with elements",
+                  "not included during fit in the %s method." % self.__class__.__name__,
+                  "%s used in training but trying to transform %s" % (str(self.elements), str(element_transform)))
+
+    # TODO Make it possible to pass data in other ways as well
+    # e.g. dictionary
+    def generate(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Representations of shape (n_samples, representation_size)
+        :rtype: array
+        """
+
+        return self.fit(X).transform(X)._representations
+
+class _MolecularRepresentation(_BaseRepresentation):
+    """
+    Base class for molecular representations
+    """
+
+    _representation_type = "molecular"
+
+class _AtomicRepresentation(_BaseRepresentation):
+    """
+    Base class for molecular representations
+    """
+
+    _representation_type = "atomic"
+    alchemy = None
+
+class CoulombMatrix(_MolecularRepresentation):
+    """
+    Coulomb Matrix representation as described in 10.1103/PhysRevLett.108.058301
+
+    """
+
+    _representation_short_name = "cm"
+
+    def __init__(self, data=None, size='auto', sorting="row-norm"):
+        """
+        Coulomb Matrix representation of a molecule.
+        Sorting of the elements can either be done by ``sorting="row-norm"`` or ``sorting="unsorted"``.
+        A matrix :math:`M` is constructed with elements
+
+        .. math::
+
+            M_{ij} =
+              \\begin{cases}
+                 \\tfrac{1}{2} Z_{i}^{2.4} & \\text{if } i = j \\\\
+                 \\frac{Z_{i}Z_{j}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|}       & \\text{if } i \\neq j
+              \\end{cases},
+
+        where :math:`i` and :math:`j` are atom indices, :math:`Z` is nuclear charge and
+        :math:`\\bf R` is the coordinate in euclidean space.
+        If ``sorting = 'row-norm'``, the atom indices are reordered such that
+
+            :math:`\\sum_j M_{1j}^2 \\geq \\sum_j M_{2j}^2 \\geq ... \\geq \\sum_j M_{nj}^2`
+
+        The upper triangular of M, including the diagonal, is concatenated to a 1D
+        vector representation.
+
+        If ``sorting = 'unsorted``, the elements are sorted in the same order as the input coordinates
+        and nuclear charges.
+
+        The representation is calculated using an OpenMP parallel Fortran routine.
+
+        :param size: The size of the largest molecule supported by the representation.
+                     `size='auto'` will try to determine this automatically.
+        :type size: integer
+        :param sorting: How the atom indices are sorted ('row-norm', 'unsorted')
+        :type sorting: string
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        """
+
+        self.size = size
+        self.sorting = sorting
+        self._set_data(data)
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+
+        data = self._extract_data(X)
+
+        natoms = data.natoms#[data._indices]
+
+        if self.size == 'auto':
+            self.size = max(natoms)
+
+        if self.size < max(natoms):
+            print("Warning: Maximum size of system increased from %d to %d"
+                    % (self.size, max(natoms)))
+            self.size = max(natoms)
+
+        return self
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        data = self._extract_data(X)
+
+        nuclear_charges = data.nuclear_charges[data._indices]
+        coordinates = data.coordinates[data._indices]
+        natoms = data.natoms[data._indices]
+
+        if max(natoms) > self.size:
+            print("Error: Representation can't be generated for molecule of size \
+                %d when variable 'size' is %d" % (max(natoms), self.size))
+            raise SystemExit
+
+        representations = []
+        if (self.sorting == "row-norm"):
+            for charge, xyz in zip(nuclear_charges, coordinates):
+                representations.append(
+                        fgenerate_coulomb_matrix(charge, xyz, self.size))
+
+        elif (self.sorting == "unsorted"):
+            for charge, xyz in zip(nuclear_charges, coordinates):
+                representations.append(
+                        fgenerate_unsorted_coulomb_matrix(charge, xyz, self.size))
+        else:
+            print("ERROR: Unknown sorting scheme requested")
+            raise SystemExit
+
+        data._representations = np.asarray(representations)
+
+        return data
+
+
+# TODO add reference
+# TODO add support for atomic properties
+#        The representation can be calculated for a subset by either specifying
+#        ``indices = [0,1,...]``, where :math:`[0,1,...]` are the requested atom indices,
+#        or by specifying ``indices = 'C'`` to only calculate central carbon atoms.
+class AtomicCoulombMatrix(_AtomicRepresentation):
+    """
+    Atomic Coulomb Matrix representation as described in 
+
+    """
+
+    _representation_short_name = "acm"
+    alchemy = True
+
+    def __init__(self, data=None, size=23, sorting="distance", central_cutoff=10.0,
+            central_decay=-1, interaction_cutoff=10.0, interaction_decay=-1):
+        """
+        Creates a Coulomb Matrix representation of the local environment of a central atom.
+        For each central atom :math:`k`, a matrix :math:`M` is constructed with elements
+
+        .. math::
+
+            M_{ij}(k) =
+              \\begin{cases}
+                 \\tfrac{1}{2} Z_{i}^{2.4} \\cdot f_{ik}^2 & \\text{if } i = j \\\\
+                 \\frac{Z_{i}Z_{j}}{\\| {\\bf R}_{i} - {\\bf R}_{j}\\|} \\cdot f_{ik}f_{jk}f_{ij} & \\text{if } i \\neq j
+              \\end{cases},
+
+        where :math:`i`, :math:`j` and :math:`k` are atom indices, :math:`Z` is nuclear charge and
+        :math:`\\bf R` is the coordinate in euclidean space.
+
+        :math:`f_{ij}` is a function that masks long range effects:
+
+        .. math::
+
+            f_{ij} =
+              \\begin{cases}
+                 1 & \\text{if } \\|{\\bf R}_{i} - {\\bf R}_{j} \\| \\leq r - \Delta r \\\\
+                 \\tfrac{1}{2} \\big(1 + \\cos\\big(\\pi \\tfrac{\\|{\\bf R}_{i} - {\\bf R}_{j} \\|
+                    - r + \Delta r}{\Delta r} \\big)\\big)     
+                    & \\text{if } r - \Delta r < \\|{\\bf R}_{i} - {\\bf R}_{j} \\| \\leq r - \Delta r \\\\
+                 0 & \\text{if } \\|{\\bf R}_{i} - {\\bf R}_{j} \\| > r
+              \\end{cases},
+
+        where the parameters ``central_cutoff`` and ``central_decay`` corresponds to the variables
+        :math:`r` and :math:`\Delta r` respectively for interactions involving the central atom,
+        and ``interaction_cutoff`` and ``interaction_decay`` corresponds to the variables
+        :math:`r` and :math:`\Delta r` respectively for interactions not involving the central atom.
+
+        if ``sorting = 'row-norm'``, the atom indices are ordered such that
+
+            :math:`\\sum_j M_{1j}(k)^2 \\geq \\sum_j M_{2j}(k)^2 \\geq ... \\geq \\sum_j M_{nj}(k)^2`
+
+        if ``sorting = 'distance'``, the atom indices are ordered such that
+
+        .. math::
+
+            \\|{\\bf R}_{1} - {\\bf R}_{k}\\| \\leq \\|{\\bf R}_{2} - {\\bf R}_{k}\\|
+                \\leq ... \\leq \\|{\\bf R}_{n} - {\\bf R}_{k}\\|
+
+        The upper triangular of M, including the diagonal, is concatenated to a 1D
+        vector representation.
+
+        The representation is calculated using an OpenMP parallel Fortran routine.
+
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        :param size: The maximum number of atoms within the cutoff radius supported by the representation.
+                     `size='auto'` will try to determine this automatically.
+        :type size: integer
+        :param sorting: How the atom indices are sorted ('row-norm', 'distance')
+        :type sorting: string
+        :param central_cutoff: The distance from the central atom, where the coulomb interaction
+            element will be zero
+        :type central_cutoff: float
+        :param central_decay: The distance over which the the coulomb interaction decays from full to none
+        :type central_decay: float
+        :param interaction_cutoff: The distance between two non-central atom, where the coulomb interaction
+            element will be zero
+        :type interaction_cutoff: float
+        :param interaction_decay: The distance over which the coulomb interaction decays from full to none
+        :type interaction_decay: float
+        """
+
+        self._set_data(data)
+        self.size = size
+        self.sorting = sorting
+        self.central_cutoff = central_cutoff
+        self.central_decay = central_decay
+        self.interaction_cutoff = interaction_cutoff
+        self.interaction_decay = interaction_decay
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+        data = self._extract_data(X)
+
+        natoms = data.natoms#[data._indices]
+
+        if self.size == 'auto':
+            self.size = min(max(natoms), 2 * self.central_cutoff**3)
+
+        return self
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        data = self._extract_data(X)
+
+        nuclear_charges = data.nuclear_charges[data._indices]
+        coordinates = data.coordinates[data._indices]
+        natoms = data.natoms[data._indices]
+
+        representations = []
+        if (self.sorting == "row-norm"):
+            for charge, xyz, n in zip(nuclear_charges, coordinates, natoms):
+                representations.append(fgenerate_local_coulomb_matrix(np.arange(n)+1, n, charge,
+                    xyz, n, self.size, self.central_cutoff,
+                    self.central_decay, self.interaction_cutoff, self.interaction_decay))
+
+        elif (self.sorting == "distance"):
+            for charge, xyz, n in zip(nuclear_charges, coordinates, natoms):
+                representations.append(fgenerate_atomic_coulomb_matrix(np.arange(1,n+1), n, charge,
+                    xyz, n, self.size, self.central_cutoff,
+                    self.central_decay, self.interaction_cutoff, self.interaction_decay))
+
+        else:
+            print("ERROR: Unknown sorting scheme requested")
+            raise SystemExit
+
+        data._representations = representations
+
+        return data
+
+
+class _SLATM(object):
+    """
+    Routines for global and local SLATM
+    """
+
+    def _fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+        data = self._extract_data(X)
+
+        if is_string(self.elements) and self.elements == 'auto' \
+                and is_string(self.element_pairs) and self.element_pairs == 'auto':
+            nuclear_charges = data.nuclear_charges#[data._indices]
+            self.elements = get_unique(nuclear_charges)
+            self.element_pairs = get_slatm_mbtypes(nuclear_charges)
+        elif not is_string(self.elements) and is_string(self.element_pairs) \
+                and self.element_pairs == 'auto':
+            self.element_pairs = get_slatm_mbtypes(self.elements * 3)
+        elif is_string(self.elements) and self.elements == 'auto' and \
+                not is_string(self.element_pairs):
+            self.elements = get_unique(self.element_pairs)
+
+        return self
+
+    def _transform(self, X, local):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param local: Use local or global version
+        :type local: bool
+        :return: Data object
+        :rtype: Data object
+        """
+
+        data = self._extract_data(X)
+
+        nuclear_charges = data.nuclear_charges[data._indices]
+        coordinates = data.coordinates[data._indices]
+        natoms = data.natoms[data._indices]
+
+        # Check that the molecules being transformed doesn't contain elements
+        # not used in the fit.
+        self._check_elements(nuclear_charges)
+
+        representations = []
+        for charge, xyz in zip(nuclear_charges, coordinates):
+            representations.append(
+                    np.asarray(
+                        generate_slatm(xyz, charge, self.element_pairs, local=local,
+                        sigmas=[self.sigma2, self.sigma3],
+                        dgrids=[self.dgrid2, self.dgrid3], rcut=self.rcut,
+                        alchemy=self.alchemy, rpower=-self.rpower)))
+
+        data._representations = np.asarray(representations)
+
+        return data
+
+# TODO add reference
+class GlobalSLATM(_SLATM, _MolecularRepresentation):
+    """
+    Global version of SLATM.
+    """
+
+    _representation_short_name = "slatm"
+
+    def __init__(self, data=None, sigma2=0.05, sigma3=0.05, dgrid2=0.03,
+            dgrid3=0.03, rcut=4.8, alchemy=False, rpower=-6, elements='auto',
+            element_pairs='auto'):
+        """
+        Generate Spectrum of London and Axillrod-Teller-Muto potential (SLATM) representation.
+
+        :param sigma2: Width of Gaussian smearing function for 2-body part.
+        :type sigma2: float
+        :param sigma3: Width of Gaussian smearing function for 3-body part.
+        :type sigma3: float
+        :param dgrid2: The interval between two sampled internuclear distances.
+        :type dgrid2: float
+        :param dgrid3: The interval between two sampled internuclear angles.
+        :type dgrid3: float
+        :param rcut: Cut-off radius.
+        :type rcut: float
+        :param alchemy: Use the alchemy version of SLATM.
+        :type alchemy: bool
+        :param rpower: The scaling power of R in 2-body potential.
+        :type rpower: float
+        :param elements: Atomnumber of elements that the representation should support.
+                         `elements='auto'` will try to determine this automatically.
+        :type elements: list
+        :param element_pairs: Atomnumbers of element pairs that the representation should support.
+                         `element_pairs='auto'` will try to determine this automatically.
+        :type element_pairs: list
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        """
+
+        self._set_data(data)
+        self.sigma2 = sigma2
+        self.sigma3 = sigma3
+        self.dgrid2 = dgrid2
+        self.dgrid3 = dgrid3
+        self.rcut = rcut
+        self.alchemy = alchemy
+        self.rpower = rpower
+        # Will be changed during fit
+        self.elements = elements
+        self.element_pairs = element_pairs
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+
+        return self._fit(X, y)
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        return self._transform(X, False)
+
+
+# TODO add reference
+class AtomicSLATM(_SLATM, _AtomicRepresentation):
+    """
+    Local version of SLATM.
+    """
+
+    _representation_short_name = "aslatm"
+
+    def __init__(self, data=None, sigma2=0.05, sigma3=0.05, dgrid2=0.03,
+            dgrid3=0.03, rcut=4.8, alchemy=False, rpower=-6, elements='auto',
+            element_pairs='auto'):
+        """
+        Generate Spectrum of London and Axillrod-Teller-Muto potential (SLATM) representation.
+
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        :param sigma2: Width of Gaussian smearing function for 2-body part.
+        :type sigma2: float
+        :param sigma3: Width of Gaussian smearing function for 3-body part.
+        :type sigma3: float
+        :param dgrid2: The interval between two sampled internuclear distances.
+        :type dgrid2: float
+        :param dgrid3: The interval between two sampled internuclear angles.
+        :type dgrid3: float
+        :param rcut: Cut-off radius.
+        :type rcut: float
+        :param alchemy: Use the alchemy version of SLATM.
+        :type alchemy: bool
+        :param rpower: The scaling power of R in 2-body potential.
+        :type rpower: float
+        :param elements: Atomnumber of elements that the representation should support.
+                         `elements='auto'` will try to determine this automatically.
+        :type elements: list
+        :param element_pairs: Atomnumbers of element pairs that the representation should support.
+                         `element_pairs='auto'` will try to determine this automatically.
+        :type element_pairs: list
+        """
+
+        self._set_data(data)
+        self.sigma2 = sigma2
+        self.sigma3 = sigma3
+        self.dgrid2 = dgrid2
+        self.dgrid3 = dgrid3
+        self.rcut = rcut
+        self.alchemy = alchemy
+        self.rpower = rpower
+        # Will be changed during fit
+        self.elements = elements
+        self.element_pairs = element_pairs
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+
+        return self._fit(X, y)
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        return self._transform(X, True)
+
+class AtomCenteredSymmetryFunctions(_AtomicRepresentation):
+    """
+    The variant of Atom-Centered Symmetry Functions used in 10.1039/C7SC04934J
+    """
+
+    _representation_short_name = "acsf"
+    alchemy = False
+
+    def __init__(self, data=None, nbasis=3, precision=2, cutoff=5.0, elements='auto'):
+        """
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        :param nbasis: Number of basis functions to use
+        :type nbasis: integer
+        :param cutoff: Cutoff radius
+        :type cutoff: float
+        :param precision: Precision in the basis functions. A value of 2 corresponds to the \
+                        basis functions intersecting at half maximum. Higher values makes \
+                        the basis functions narrower.
+        :type precision: float
+        :param elements: Atomnumber of elements that the representation should support.
+                         `elements='auto'` will try to determine this automatically.
+        :type elements: list
+        """
+
+        self._set_data(data)
+        self.nbasis = nbasis
+        self.precision = precision
+        self.cutoff = cutoff
+        # Will be changed during fit
+        self.elements = elements
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+        data = self._extract_data(X)
+
+        if is_string(self.elements) and self.elements == 'auto':
+            nuclear_charges = data.nuclear_charges#[data._indices]
+            self.elements = get_unique(nuclear_charges)
+
+        return self
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        data = self._extract_data(X)
+
+        nuclear_charges = data.nuclear_charges[data._indices]
+        coordinates = data.coordinates[data._indices]
+        natoms = data.natoms[data._indices]
+
+        # Check that the molecules being transformed doesn't contain elements
+        # not used in the fit.
+        self._check_elements(nuclear_charges)
+
+        # Calculate parameters needed for fortran input.
+        # This is a heuristic that cuts down the number of hyper-parameters
+        # and might be subject to future change.
+        min_distance = 0.8
+        Rs = np.linspace(min_distance, self.cutoff, self.nbasis)
+        eta = 4 * np.log(self.precision) * ((self.nbasis-1)/(self.cutoff-min_distance))**2
+        Ts = np.linspace(0, np.pi, self.nbasis)
+        zeta = - np.log(self.precision) / np.log(np.cos(np.pi / (4 * (self.nbasis - 1)))**2)
+        n_elements = len(self.elements)
+        size = n_elements * self.nbasis + (n_elements * (n_elements + 1)) // 2 * self.nbasis ** 2
+
+        representations = []
+        for charge, xyz, n in zip(nuclear_charges, coordinates, natoms):
+            representations.append(
+                        fgenerate_acsf(xyz, charge, self.elements, Rs, Rs, Ts,
+                            eta, eta, zeta, self.cutoff, self.cutoff, n, size))
+
+        data._representations = np.asarray(representations)
+
+        return data
+
+class FCHLRepresentation(_BaseRepresentation):
+    """
+    The representation from 10.1063/1.5020710
+    """
+
+    _representation_short_name = "fchl"
+
+    def __init__(self, data=None, size='auto', cutoff=10.0):
+        """
+        :param data: Optional Data object containing all molecules used in training \
+                and/or prediction
+        :type data: Data object
+        :param size: Max number of atoms in representation. `max_size='auto'` Will try to determine this
+                         automatically.
+        :type size: integer
+        :param cutoff: Spatial cut-off distance - must be the same as used in the kernel function call.
+        :type cutoff: float
+        """
+
+        self._set_data(data)
+        self.size = size
+        self.cutoff = cutoff
+
+    def fit(self, X, y=None):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :param y: Dummy argument for scikit-learn
+        :type y: NoneType
+        :return: self
+        :rtype: object
+        """
+        data = self._extract_data(X)
+
+        natoms = data.natoms#[data._indices]
+
+        if self.size == 'auto':
+            self.size = max(natoms)
+
+        if self.size < max(natoms):
+            print("Warning: Maximum size of system increased from %d to %d"
+                    % (self.size, max(natoms)))
+            self.size = max(natoms)
+
+        return self
+
+    def transform(self, X):
+        """
+        :param X: Data object or indices to use from the \
+                Data object passed at initialization
+        :type X: Data object or array of indices
+        :return: Data object
+        :rtype: Data object
+        """
+
+        data = self._extract_data(X)
+
+        # Store cutoff to make sure that the kernel cutoff is less
+        data._representation_cutoff = self.cutoff
+
+        nuclear_charges = data.nuclear_charges[data._indices]
+        coordinates = data.coordinates[data._indices]
+        natoms = data.natoms[data._indices]
+
+        if max(natoms) > self.size:
+            print("Error: Representation can't be generated for molecule of size \
+                %d when variable 'size' is %d" % (max(natoms), self.size))
+            raise SystemExit
+
+        representations = []
+        for charge, xyz in zip(nuclear_charges, coordinates):
+            representations.append(
+                    generate_representation(xyz, charge, max_size=self.size, cut_distance=self.cutoff))
+
+        data._representations = np.asarray(representations)
+
+        return data
diff --git a/qml/representations/representations.py b/qml/representations/representations.py
index bd6416857..d3c83ee86 100644
--- a/qml/representations/representations.py
+++ b/qml/representations/representations.py
@@ -32,7 +32,7 @@
 from .frepresentations import fgenerate_eigenvalue_coulomb_matrix
 from .frepresentations import fgenerate_bob
 
-from qml.data.alchemy import NUCLEAR_CHARGE
+from qml.utils import NUCLEAR_CHARGE
 
 from .slatm import get_boa
 from .slatm import get_sbop
diff --git a/qml/utils/__init__.py b/qml/utils/__init__.py
new file mode 100644
index 000000000..d710f72d9
--- /dev/null
+++ b/qml/utils/__init__.py
@@ -0,0 +1,25 @@
+# MIT License
+#
+# Copyright (c) 2017-2018 Silvia Amabilino, Lars Andersen Bratholm
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from . import alchemy
+from .utils import *
+from .alchemy import ELEMENT_NAME, NUCLEAR_CHARGE
diff --git a/qml/data/alchemy.py b/qml/utils/alchemy.py
similarity index 100%
rename from qml/data/alchemy.py
rename to qml/utils/alchemy.py
diff --git a/qml/aglaia/utils.py b/qml/utils/utils.py
similarity index 94%
rename from qml/aglaia/utils.py
rename to qml/utils/utils.py
index 87beac4d7..05b96cbd5 100644
--- a/qml/aglaia/utils.py
+++ b/qml/utils/utils.py
@@ -41,9 +41,6 @@ def is_positive_integer_or_zero(x):
 def is_string(x):
     return isinstance(x, str)
 
-def is_none(x):
-    return isinstance(x, type(None))
-
 def is_dict(x):
     return isinstance(x, dict)
 
@@ -88,10 +85,28 @@ def _is_integer_array(x):
 def is_positive_integer_array(x):
     return (_is_integer_array(x) and _is_positive_array(x))
 
-
 def is_positive_integer_or_zero_array(x):
     return (_is_integer_array(x) and _is_positive_or_zero_array(x))
 
+def get_unique(x):
+    """
+    Gets all unique elements in lists of lists
+    """
+    elements = list(set(item for l in x for item in l))
+    return elements
+
+def get_pairs(x):
+    """
+    Get all unique pairs. E.g. x = [1,2,3] will return
+    [[1, 1], [1, 2], [1, 3], [2, 2], [2, 3], [3, 3]]
+    """
+    pairs = []
+    for i,v in enumerate(x):
+        for w in x[i:]:
+            pairs.append([v,w])
+    return pairs
+
+
 # ------------- ** Checking inputs ** --------------------------
 
 def check_global_representation(x):
@@ -157,14 +172,14 @@ def check_sizes(x, y=None, dy=None, classes=None):
     :return: None
     """
 
-    if is_none(dy) and is_none(classes):
+    if dy is None and classes is None:
 
         if x.shape[0] != y.shape[0]:
             raise InputError("The descriptor and the properties should have the same first number of elements in the "
                              "first dimension. Got %s and %s" % (x.shape[0], y.shape[0]))
 
-    elif is_none(y) and is_none(dy):
-        if is_none(classes):
+    elif y is None and dy is None:
+        if classes is None:
             raise InputError("Only x is not none.")
         else:
             if x.shape[0] != classes.shape[0]:
@@ -173,7 +188,7 @@ def check_sizes(x, y=None, dy=None, classes=None):
                 if x.shape[1] != classes.shape[1]:
                     raise InputError("The number of atoms in the descriptor and in the classes is different: %s and %s." % (x.shape[1], classes.shape[1]))
 
-    elif is_none(dy) and not is_none(classes):
+    elif dy is None and classes is not None:
 
         if x.shape[0] != y.shape[0] or x.shape[0] != classes.shape[0]:
             raise InputError("All x, y and classes should have the first number of elements in the first dimension. Got "
@@ -202,7 +217,7 @@ def check_dy(dy):
     :return: numpy array of floats of shape (n_samples, n_atoms, 3)
     """
 
-    if is_none(dy):
+    if dy is None:
         approved_dy = dy
     else:
         if not is_array_like(dy):
@@ -227,7 +242,7 @@ def check_classes(classes):
     :return: numpy array of ints of shape (n_samples, n_atoms)
     """
 
-    if is_none(classes):
+    if classes is None:
         approved_classes = classes
     else:
         if not is_array_like(classes):
@@ -244,13 +259,6 @@ def check_classes(classes):
 
     return approved_classes
 
-
-
-
-
-
-
-
 #
 #def _is_numeric_array(x):
 #    try:
diff --git a/setup.py b/setup.py
index a1d4e1ff9..0f30de11f 100755
--- a/setup.py
+++ b/setup.py
@@ -146,7 +146,9 @@ def setup_qml():
             'qml.kernels',
             'qml.math',
             'qml.representations',
-            'qml.models',
+            'qml.qmlearn',
+            'qml.utils',
+            'qml.models'
             ],
 
         # metadata
diff --git a/test/test_armp.py b/test/test_armp.py
index b7bc3ddad..4c6fb10ab 100644
--- a/test/test_armp.py
+++ b/test/test_armp.py
@@ -27,7 +27,7 @@
 
 import numpy as np
 from qml.aglaia.aglaia import ARMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 import glob
 import os
 
diff --git a/test/test_mrmp.py b/test/test_mrmp.py
index ffe32e585..1f98d7602 100644
--- a/test/test_mrmp.py
+++ b/test/test_mrmp.py
@@ -27,7 +27,7 @@
 
 import numpy as np
 from qml.aglaia.aglaia import MRMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 import glob
 import os
 import shutil
diff --git a/test/test_neural_network.py b/test/test_neural_network.py
index 775335f12..21ec5318b 100644
--- a/test/test_neural_network.py
+++ b/test/test_neural_network.py
@@ -29,7 +29,7 @@
 
 # TODO relative imports
 from qml.aglaia.aglaia import MRMP
-from qml.aglaia.utils import InputError
+from qml.utils import InputError
 
 
 # ------------ ** All functions to test the inputs to the classes ** ---------------