From 9b22353b2c2a040fe901f370a9ee3cd59add3859 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Wed, 17 Aug 2022 12:40:49 +0100
Subject: [PATCH 01/19] add num_functions attribute to SumFunction

---
 Wrappers/Python/cil/optimisation/functions/Function.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index e2ee72dcb6..9ead567526 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -206,11 +206,12 @@ class SumFunction(Function):
     """
     
     def __init__(self, *functions ):
-                
-        super(SumFunction, self).__init__()        
-        if len(functions) < 2:
+                       
+        self.num_functions = len(functions)
+        if self.num_functions < 2:
             raise ValueError('At least 2 functions need to be passed')
         self.functions = functions
+        super(SumFunction, self).__init__() 
         
     @property
     def L(self):

From 670e3c8502aabf3f66343721d601467c4b725006 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Wed, 17 Aug 2022 21:15:58 +0100
Subject: [PATCH 02/19] Adds SubsetSumFunction

---
 .../cil/optimisation/functions/Function.py    |  86 +++++++++++++++
 .../cil/optimisation/functions/__init__.py    |   2 +
 .../Python/test/test_SubsetSumFunction.py     | 102 ++++++++++++++++++
 docs/source/optimisation.rst                  |   4 +
 4 files changed, 194 insertions(+)
 create mode 100644 Wrappers/Python/test/test_SubsetSumFunction.py

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index 9ead567526..cb4730273e 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -301,6 +301,11 @@ def gradient(self, x, out=None):
                 else:
                     out += f.gradient(x)
 
+    def __getitem__(self, ind):
+        """  Return the function in the :code:`ind` index.
+        """
+        return self.functions[ind]                     
+
     def __add__(self, other):
         
         """ Addition for the SumFunction.
@@ -321,7 +326,88 @@ def __add__(self, other):
         else:
             return super(SumFunction, self).__add__(other)
 
+class SubsetSumFunction(SumFunction):
+
+    r"""SubsetSumFunction represents the following sum 
+    
+    .. math:: \frac{1}{n}\sum_{i=1}^{n} F_{i} = \frac{1}{n}(F_{1} + F_{2} + ... + F_{n})
+
+    where :math:`n` is the number of subsets.
+
+    Parameters:
+    -----------
+    functions : list(functions) 
+                A list of functions: :code:`[F_{1}, F_{2}, ..., F_{n}]`. Each function is assumed to be smooth function with an implemented :func:`~Function.gradient` method.
+    sampling : :obj:`string`, Default = :code:`random`
+               Selection process for each function in the list. It can be :code:`random` or :code:`sequential`. 
+    replacement : :obj:`boolean`. Default = :code:`True`
+               The same subset can be selected when :code:`replacement=True`. 
+                
+    Example
+    -------
+
+    .. math:: \sum_{i=1}^{n} F_{i}(x) = \sum_{i=1}^{n}\|A_{i} x - b_{i}\|^{2}
+
+    >>> f = SubsetSumFunction([LeastSquares(Ai, b=bi)]*n for Ai,bi in zip(A_subsets, b_subsets))   
+  
+    """
+    
+    def __init__(self, functions, 
+                 sampling = "random",
+                 replacement = True,
+                 subset_init = -1):    
+        
+        super(SubsetSumFunction, self).__init__(*functions)
+        
+        self.subset_num = subset_init
+        self.data_passes = [0]
+        self.sampling = sampling  
+        self.replacement = replacement
+        self.enumerate_functions = list(range(self.num_subsets))
+                                                            
+    @property
+    def num_subsets(self):
+        return self.num_functions
 
+    def __call__(self, x):
+        r"""Returns the value of the averaged sum of functions at :math:`x`.		
+        		
+        .. math:: \frac{1}{n}(F_{1} + F_{2} + ... + F_{n}))(x) = \frac{1}{n} * ( F_{1}(x) + F_{2}(x) + ... + F_{n}(x))
+        """
+                		
+        return (1/self.num_subsets) * super(SubsetSumFunction, self).__call__(x)      
+        
+    def _full_gradient(self, x, out=None):
+        r""" Computes the full gradient at :code:`x`. It is the sum of all the gradients for each function. """
+        return (1/self.num_subsets) * super(SubsetSumFunction, self).gradient(x, out=out)
+        
+    def gradient(self, x, out=None):
+        """ Computes the gradient for each subset function at :code:`x`."""      
+        raise NotImplemented
+               
+    def next_subset(self):
+        
+        """ Selects the next subset according to :code:`sampling`.          
+        """
+        
+        if self.sampling=="random" :
+            
+            if self.replacement is False:
+                self.subset_num = np.random.choice(self.enumerate_functions)
+                self.enumerate_functions.remove(self.subset_num)
+                if len(self.enumerate_functions)==0:
+                    self.enumerate_functions = list(range(self.num_subsets))                
+            else:
+                self.subset_num = np.random.randint(0, self.num_subsets)
+                
+        elif self.sampling=="sequential":                                                                          
+            if self.subset_num + 1 >= self.num_subsets:
+                self.subset_num = 0                
+            else:
+                self.subset_num += 1         
+        else:
+            raise NotImplementedError("Only {} and {} are implemented at the moment.".format("random","sequential"))          
+                                  
 class ScaledFunction(Function):
     
     r""" ScaledFunction represents the scalar multiplication with a Function.
diff --git a/Wrappers/Python/cil/optimisation/functions/__init__.py b/Wrappers/Python/cil/optimisation/functions/__init__.py
index 0dcafcf24b..a5e93622b5 100644
--- a/Wrappers/Python/cil/optimisation/functions/__init__.py
+++ b/Wrappers/Python/cil/optimisation/functions/__init__.py
@@ -17,6 +17,7 @@
 
 from .Function import Function
 from .Function import SumFunction
+from .Function import SubsetSumFunction
 from .Function import ScaledFunction
 from .Function import SumScalarFunction
 from .Function import ConstantFunction
@@ -33,3 +34,4 @@
 from .KullbackLeibler import KullbackLeibler
 from .Rosenbrock import Rosenbrock
 from .TotalVariation import TotalVariation
+from .SGDFunction import SGDFunction
diff --git a/Wrappers/Python/test/test_SubsetSumFunction.py b/Wrappers/Python/test/test_SubsetSumFunction.py
new file mode 100644
index 0000000000..6d2234497a
--- /dev/null
+++ b/Wrappers/Python/test/test_SubsetSumFunction.py
@@ -0,0 +1,102 @@
+import unittest
+from utils import initialise_tests
+from cil.optimisation.operators import MatrixOperator
+from cil.optimisation.functions import LeastSquares, SubsetSumFunction
+from cil.framework import VectorData
+import numpy as np                  
+                  
+
+initialise_tests()
+
+class TestSubsetSumFunction(unittest.TestCase):
+                    
+    def setUp(self):   
+
+        np.random.seed(10)
+        n = 50
+        m = 500
+        self.n_subsets = 10
+
+        Anp = np.random.uniform(0,1, (m, n)).astype('float32')
+        xnp = np.random.uniform(0,1, (n,)).astype('float32')
+        bnp = Anp.dot(xnp)
+
+        Ai = np.vsplit(Anp, self.n_subsets) 
+        bi = [bnp[i:i+n] for i in range(0, m, n)]     
+
+        self.Aop = MatrixOperator(Anp)
+        self.bop = VectorData(bnp) 
+        ig = self.Aop.domain        
+        self.x_cil = ig.allocate('random')
+
+        self.fi_cil = []
+        for i in range(self.n_subsets):   
+            Ai_cil = MatrixOperator(Ai[i])
+            bi_cil = VectorData(bi[i])
+            self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
+
+        self.f = (1/self.n_subsets) * LeastSquares(self.Aop, b=self.bop, c=1.0)
+        self.f_subset_sum_function = SubsetSumFunction(self.fi_cil)
+        self.f_subset_sum_function_no_replacement = SubsetSumFunction(self.fi_cil, sampling="random", replacement=False) 
+        self.f_subset_sum_function_sequential = SubsetSumFunction(self.fi_cil, sampling="sequential")              
+
+    def test_call_method(self):
+        
+        res1 = self.f(self.x_cil)
+        res2 = self.f_subset_sum_function(self.x_cil)
+        np.testing.assert_allclose(res1, res2)
+
+    def test_full_gradient(self):
+        
+        res1 = self.f.gradient(self.x_cil)
+        res2 = self.f_subset_sum_function._full_gradient(self.x_cil)
+        np.testing.assert_allclose(res1.array, res2.array, atol=1e-3)        
+
+    def test_sampling(self):
+
+        # check sequential selection
+        for i in range(self.n_subsets):
+            self.f_subset_sum_function_sequential.next_subset()
+            np.testing.assert_equal(self.f_subset_sum_function_sequential.subset_num, i)
+
+        # check random selection with no replacement
+        epochs = 2
+        choices = [[],[]]
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function_no_replacement.next_subset()
+                choices[i].append(self.f_subset_sum_function_no_replacement.subset_num)
+        self.assertTrue( len(set(choices[0]))== len(set(choices[1])))
+
+        # check random selection with replacement
+        epochs = 2
+        choices = [[],[]]
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function.next_subset()
+                choices[i].append(self.f_subset_sum_function.subset_num)
+        self.assertTrue( len(set(choices[0]))!= len(set(choices[1])))  
+
+        with self.assertRaises(NotImplementedError):
+            f=SubsetSumFunction(self.fi_cil, sampling="not implemented")
+            f.next_subset()
+
+              
+
+
+
+
+                      
+
+
+
+
+
+
+
+
+
+
+
+
+           
\ No newline at end of file
diff --git a/docs/source/optimisation.rst b/docs/source/optimisation.rst
index e6eb3b8a75..962a758259 100644
--- a/docs/source/optimisation.rst
+++ b/docs/source/optimisation.rst
@@ -228,6 +228,10 @@ Base classes
    :members:
    :special-members: 
 
+.. autoclass:: cil.optimisation.functions.SubsetSumFunction
+   :members:
+   :special-members:    
+
 .. autoclass:: cil.optimisation.functions.ScaledFunction
    :members:
    :special-members:

From 4f63f10a9be542970b498934292678c1ffcde50a Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Wed, 17 Aug 2022 21:16:18 +0100
Subject: [PATCH 03/19] Add SGDFunction

---
 .../cil/optimisation/functions/SGDFunction.py | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 Wrappers/Python/cil/optimisation/functions/SGDFunction.py

diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
new file mode 100644
index 0000000000..f5667c0b99
--- /dev/null
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+#   This work is part of the Core Imaging Library (CIL) developed by CCPi 
+#   (Collaborative Computational Project in Tomographic Imaging), with 
+#   substantial contributions by UKRI-STFC and University of Manchester.
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+from cil.optimisation.functions import SubsetSumFunction
+
+class SGDFunction(SubsetSumFunction):
+    '''Class for use as objective function in gradient type algorithms to enable the use of subsets.
+    The `gradient` method does not correspond to the mathematical gradient of a sum of functions, 
+    but rather to a variance-reduced approximated gradient corresponding to the minibatch SGD algorithm.
+    More details can be found below, in the gradient method.
+    Parameters:
+    -----------
+    - (optional) 
+        precond: function taking into input an integer (subset_num) and a DataContainer and outputting a DataContainer
+        serves as diagonal preconditioner
+        default None
+    - (optional)
+        gradient_initialisation_point: point to initialize the gradient of each subset
+        and the full gradient
+        default None
+           
+    '''
+  
+    def __init__(self, functions, sampling = "random", precond=None):
+
+        super(SGDFunction, self).__init__(functions, sampling = sampling)
+        self.precond = precond
+
+    def gradient(self, x, out=None):
+        """
+        Returns a vanilla stochastic gradient estimate, defined below.
+        For f = 1/num_subsets \sum_{i=1}^num_subsets f_i, the output is computed as follows:
+            - choose a subset j with function next_subset()
+            - compute the gradient of the j-th function at current iterate
+            - this gives an unbiased estimator of the gradient
+        """
+
+        # Select the next subset (uniformly at random by default). This generates self.subset_num
+        self.next_subset()
+        
+        # Compute new gradient for current subset, store in ret
+        if out is None:
+            ret = 0.0 * x
+            self.functions[self.subset_num].gradient(x, out=ret)
+        else:
+            self.functions[self.subset_num].gradient(x, out=out)
+
+        # Apply preconditioning
+        if self.precond is not None:
+            if out is None:
+                ret.multiply(self.precond(self.subset_num,x),out=ret)
+            else:
+                out.multiply(self.precond(self.subset_num,x),out=out)
+
+        if out is None:
+            return ret
+        
\ No newline at end of file

From 5526597f4db9fb85c11e813b4adbff093e5c8c42 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Thu, 18 Aug 2022 13:42:27 +0100
Subject: [PATCH 04/19] add tests fos SGDFunction

---
 .../cil/optimisation/functions/SGDFunction.py | 61 +++++++------
 Wrappers/Python/test/test_SGDFunction.py      | 87 +++++++++++++++++++
 2 files changed, 121 insertions(+), 27 deletions(-)
 create mode 100644 Wrappers/Python/test/test_SGDFunction.py

diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
index f5667c0b99..ac431a8eae 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -18,41 +18,48 @@
 from cil.optimisation.functions import SubsetSumFunction
 
 class SGDFunction(SubsetSumFunction):
-    '''Class for use as objective function in gradient type algorithms to enable the use of subsets.
-    The `gradient` method does not correspond to the mathematical gradient of a sum of functions, 
-    but rather to a variance-reduced approximated gradient corresponding to the minibatch SGD algorithm.
-    More details can be found below, in the gradient method.
+
+    r""" Stochastic Gradient Descent Function (SGDFunction) 
+
+    The SDGFunction represents the objective function :math:`\frac{1}{n}\sum_{i=1}^{n}F_{i}(x)`, where
+    :math:`n` denotes the number of subsets. 
+    
     Parameters:
     -----------
-    - (optional) 
-        precond: function taking into input an integer (subset_num) and a DataContainer and outputting a DataContainer
-        serves as diagonal preconditioner
-        default None
-    - (optional)
-        gradient_initialisation_point: point to initialize the gradient of each subset
-        and the full gradient
-        default None
-           
-    '''
+    functions : list(functions) 
+                A list of functions: :code:`[F_{1}, F_{2}, ..., F_{n}]`. Each function is assumed to be smooth function with an implemented :func:`~Function.gradient` method.
+    sampling : :obj:`string`, Default = :code:`random`
+               Selection process for each function in the list. It can be :code:`random` or :code:`sequential`. 
+    replacement : :obj:`boolean`. Default = :code:`True`
+               The same subset can be selected when :code:`replacement=True`. 
+    precond : DataContainer
+               A preconditioner, i.e, an array that multiplies the output from the gradient of the selected function :math:`\partial_F_{i}(x)`.
+    
+    Note
+    ----
+        
+    The :meth:`~SGDFunction.gradient` computes the `gradient` of one function from the list :math:`[F_{1},\cdots,F_{n}]`,
+    
+    .. math:: \partial F_{i}(x) .
+
+    The ith function is selected from the :meth:`~SubsetSumFunction.next_subset` method.
+    
+"""
   
-    def __init__(self, functions, sampling = "random", precond=None):
+    def __init__(self, functions, sampling = "random", replacement = False, precond=None):
 
-        super(SGDFunction, self).__init__(functions, sampling = sampling)
+        super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement)
         self.precond = precond
 
     def gradient(self, x, out=None):
-        """
-        Returns a vanilla stochastic gradient estimate, defined below.
-        For f = 1/num_subsets \sum_{i=1}^num_subsets f_i, the output is computed as follows:
-            - choose a subset j with function next_subset()
-            - compute the gradient of the j-th function at current iterate
-            - this gives an unbiased estimator of the gradient
+        
+        """ Returns the gradient of the selected function at :code:`x`. The function is selected using the :meth:`~SubsetSumFunction.next_subset`
         """
 
-        # Select the next subset (uniformly at random by default). This generates self.subset_num
+        # Select the next subset
         self.next_subset()
         
-        # Compute new gradient for current subset, store in ret
+        # Compute new gradient for current subset
         if out is None:
             ret = 0.0 * x
             self.functions[self.subset_num].gradient(x, out=ret)
@@ -60,11 +67,11 @@ def gradient(self, x, out=None):
             self.functions[self.subset_num].gradient(x, out=out)
 
         # Apply preconditioning
-        if self.precond is not None:
+        if self.precond is not None:            
             if out is None:
-                ret.multiply(self.precond(self.subset_num,x),out=ret)
+                ret.multiply(self.precond,out=ret)
             else:
-                out.multiply(self.precond(self.subset_num,x),out=out)
+                out.multiply(self.precond,out=out)
 
         if out is None:
             return ret
diff --git a/Wrappers/Python/test/test_SGDFunction.py b/Wrappers/Python/test/test_SGDFunction.py
new file mode 100644
index 0000000000..9f5e5c3d8a
--- /dev/null
+++ b/Wrappers/Python/test/test_SGDFunction.py
@@ -0,0 +1,87 @@
+import unittest
+from utils import initialise_tests
+from cil.optimisation.operators import MatrixOperator
+from cil.optimisation.functions import LeastSquares, SubsetSumFunction, SGDFunction
+from cil.framework import VectorData
+import numpy as np                  
+                  
+
+initialise_tests()
+
+class TestSGDFunction(unittest.TestCase):
+                    
+    def setUp(self):   
+
+        np.random.seed(10)
+        n = 50
+        m = 500
+        self.n_subsets = 10
+
+        Anp = np.random.uniform(0,1, (m, n)).astype('float32')
+        xnp = np.random.uniform(0,1, (n,)).astype('float32')
+        bnp = Anp.dot(xnp)
+
+        Ai = np.vsplit(Anp, self.n_subsets) 
+        bi = [bnp[i:i+n] for i in range(0, m, n)]     
+
+        self.Aop = MatrixOperator(Anp)
+        self.bop = VectorData(bnp) 
+        ig = self.Aop.domain        
+        self.x_cil = ig.allocate('random')
+
+        self.fi_cil = []
+        for i in range(self.n_subsets):   
+            Ai_cil = MatrixOperator(Ai[i])
+            bi_cil = VectorData(bi[i])
+            self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
+
+        self.f = (1/self.n_subsets) * LeastSquares(self.Aop, b=self.bop, c=1.0)
+        self.f_SGD = SGDFunction(self.fi_cil, sampling="sequential")
+
+        precond = ig.allocate(1.0)
+        self.f_SGD_precond = SGDFunction(self.fi_cil, sampling="sequential", precond=precond)
+
+    def test_gradient(self):
+
+        out1 = self.x_cil.geometry.allocate()
+        out2 = self.x_cil.geometry.allocate()
+
+        # No preconditioning
+        self.f_SGD.gradient(self.x_cil, out=out1)
+
+        self.f_SGD[self.f_SGD.subset_num].gradient(self.x_cil, out=out2)
+        np.testing.assert_allclose(out1.array, out2.array, atol=1e-3) 
+
+        out3 = self.x_cil.geometry.allocate()
+        out4 = self.x_cil.geometry.allocate()
+
+        # With preconditioning
+        self.f_SGD_precond.gradient(self.x_cil, out=out1)
+
+        self.f_SGD_precond[self.f_SGD.subset_num].gradient(self.x_cil, out=out2)
+        out2.multiply(self.f_SGD_precond.precond, out=out2)
+        np.testing.assert_allclose(out1.array, out2.array, atol=1e-3)         
+
+
+
+
+
+              
+
+
+
+
+                      
+
+
+
+
+
+
+
+
+
+
+
+
+           
\ No newline at end of file

From 277b85de9cc13638ff498ad0f478cda995ebad03 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Tue, 23 Aug 2022 17:45:57 +0100
Subject: [PATCH 05/19] precond as callable function

---
 .../cil/optimisation/functions/SGDFunction.py    |  6 +++---
 Wrappers/Python/test/test_SGDFunction.py         | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
index ac431a8eae..241f8eb099 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -67,11 +67,11 @@ def gradient(self, x, out=None):
             self.functions[self.subset_num].gradient(x, out=out)
 
         # Apply preconditioning
-        if self.precond is not None:            
+        if self.precond is not None:
             if out is None:
-                ret.multiply(self.precond,out=ret)
+                ret.multiply(self.precond(self.subset_num, x),out=ret)
             else:
-                out.multiply(self.precond,out=out)
+                out.multiply(self.precond(self.subset_num, x),out=out)            
 
         if out is None:
             return ret
diff --git a/Wrappers/Python/test/test_SGDFunction.py b/Wrappers/Python/test/test_SGDFunction.py
index 9f5e5c3d8a..f4dc090bad 100644
--- a/Wrappers/Python/test/test_SGDFunction.py
+++ b/Wrappers/Python/test/test_SGDFunction.py
@@ -26,8 +26,8 @@ def setUp(self):
 
         self.Aop = MatrixOperator(Anp)
         self.bop = VectorData(bnp) 
-        ig = self.Aop.domain        
-        self.x_cil = ig.allocate('random')
+        self.ig = self.Aop.domain        
+        self.x_cil = self.ig.allocate('random')
 
         self.fi_cil = []
         for i in range(self.n_subsets):   
@@ -38,8 +38,8 @@ def setUp(self):
         self.f = (1/self.n_subsets) * LeastSquares(self.Aop, b=self.bop, c=1.0)
         self.f_SGD = SGDFunction(self.fi_cil, sampling="sequential")
 
-        precond = ig.allocate(1.0)
-        self.f_SGD_precond = SGDFunction(self.fi_cil, sampling="sequential", precond=precond)
+        self.precond = lambda i, x: 3./self.ig.allocate(2.5)
+        self.f_SGD_precond = SGDFunction(self.fi_cil, sampling="sequential", precond=self.precond)
 
     def test_gradient(self):
 
@@ -56,11 +56,11 @@ def test_gradient(self):
         out4 = self.x_cil.geometry.allocate()
 
         # With preconditioning
-        self.f_SGD_precond.gradient(self.x_cil, out=out1)
+        self.f_SGD_precond.gradient(self.x_cil, out=out3)
 
-        self.f_SGD_precond[self.f_SGD.subset_num].gradient(self.x_cil, out=out2)
-        out2.multiply(self.f_SGD_precond.precond, out=out2)
-        np.testing.assert_allclose(out1.array, out2.array, atol=1e-3)         
+        self.f_SGD_precond[self.f_SGD.subset_num].gradient(self.x_cil, out=out4)
+        out4*=self.precond(self.f_SGD_precond.subset_num, 3./self.ig.allocate(2.5))
+        np.testing.assert_allclose(out3.array, out4.array, atol=1e-3)         
 
 
 

From a29fd55c61b6ebc587740963e88c5d064cc28595 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Tue, 23 Aug 2022 17:54:46 +0100
Subject: [PATCH 06/19] remove out is not None case

---
 .../cil/optimisation/functions/SGDFunction.py    | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
index 241f8eb099..9828130274 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -51,7 +51,7 @@ def __init__(self, functions, sampling = "random", replacement = False, precond=
         super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement)
         self.precond = precond
 
-    def gradient(self, x, out=None):
+    def gradient(self, x, out):
         
         """ Returns the gradient of the selected function at :code:`x`. The function is selected using the :meth:`~SubsetSumFunction.next_subset`
         """
@@ -60,19 +60,9 @@ def gradient(self, x, out=None):
         self.next_subset()
         
         # Compute new gradient for current subset
-        if out is None:
-            ret = 0.0 * x
-            self.functions[self.subset_num].gradient(x, out=ret)
-        else:
-            self.functions[self.subset_num].gradient(x, out=out)
+        self.functions[self.subset_num].gradient(x, out=out)
 
         # Apply preconditioning
         if self.precond is not None:
-            if out is None:
-                ret.multiply(self.precond(self.subset_num, x),out=ret)
-            else:
-                out.multiply(self.precond(self.subset_num, x),out=out)            
-
-        if out is None:
-            return ret
+            out.multiply(self.precond(self.subset_num, x), out=out)            
         
\ No newline at end of file

From c0881d01ce88d389459c7dc55ad8e0d79d24f409 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Thu, 25 Aug 2022 18:04:15 +0100
Subject: [PATCH 07/19] Improve cvxpy tests

---
 Wrappers/Python/test/test_SGDFunction.py | 80 +++++++++++++++---------
 1 file changed, 51 insertions(+), 29 deletions(-)

diff --git a/Wrappers/Python/test/test_SGDFunction.py b/Wrappers/Python/test/test_SGDFunction.py
index f4dc090bad..474f6a790c 100644
--- a/Wrappers/Python/test/test_SGDFunction.py
+++ b/Wrappers/Python/test/test_SGDFunction.py
@@ -2,65 +2,87 @@
 from utils import initialise_tests
 from cil.optimisation.operators import MatrixOperator
 from cil.optimisation.functions import LeastSquares, SubsetSumFunction, SGDFunction
+from cil.optimisation.algorithms import GD
 from cil.framework import VectorData
 import numpy as np                  
                   
-
 initialise_tests()
 
+from utils import has_cvxpy
+
+if has_cvxpy:
+    import cvxpy
+
 class TestSGDFunction(unittest.TestCase):
                     
-    def setUp(self):   
 
+    def setUp(self):
+        
         np.random.seed(10)
         n = 50
         m = 500
-        self.n_subsets = 10
 
+        A = np.random.uniform(0,1, (m, n)).astype('float32')
+        b = (A.dot(np.random.randn(n)) + 0.1*np.random.randn(m)).astype('float32')
+
+        self.Aop = MatrixOperator(A)
+        self.bop = VectorData(b) 
+        
         Anp = np.random.uniform(0,1, (m, n)).astype('float32')
         xnp = np.random.uniform(0,1, (n,)).astype('float32')
         bnp = Anp.dot(xnp)
 
-        Ai = np.vsplit(Anp, self.n_subsets) 
-        bi = [bnp[i:i+n] for i in range(0, m, n)]     
+        # split data, operators, functions
+        self.n_subsets = 10
 
-        self.Aop = MatrixOperator(Anp)
-        self.bop = VectorData(bnp) 
-        self.ig = self.Aop.domain        
-        self.x_cil = self.ig.allocate('random')
+        Ai = np.vsplit(A, self.n_subsets) 
+        bi = [b[i:i+n] for i in range(0, m, n)]     
 
         self.fi_cil = []
         for i in range(self.n_subsets):   
-            Ai_cil = MatrixOperator(Ai[i])
-            bi_cil = VectorData(bi[i])
-            self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
-
-        self.f = (1/self.n_subsets) * LeastSquares(self.Aop, b=self.bop, c=1.0)
-        self.f_SGD = SGDFunction(self.fi_cil, sampling="sequential")
-
-        self.precond = lambda i, x: 3./self.ig.allocate(2.5)
-        self.f_SGD_precond = SGDFunction(self.fi_cil, sampling="sequential", precond=self.precond)
+            self.Ai_cil = MatrixOperator(Ai[i])
+            self.bi_cil = VectorData(bi[i])
+            self.fi_cil.append(LeastSquares(self.Ai_cil, self.bi_cil, c = 0.5))
+            
+        self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5/self.n_subsets) 
+        
+        self.ig = self.Aop.domain
+        self.precond = lambda i, x: self.ig.allocate(1.)
+        self.F_SGD = SGDFunction(self.fi_cil, replacement = True, precond = self.precond)           
+
+        self.initial = self.ig.allocate()  
 
     def test_gradient(self):
 
-        out1 = self.x_cil.geometry.allocate()
-        out2 = self.x_cil.geometry.allocate()
+        out1 = self.ig.allocate()
+        out2 = self.ig.allocate()
+
+        x = self.ig.allocate('random')
 
         # No preconditioning
-        self.f_SGD.gradient(self.x_cil, out=out1)
+        self.F_SGD.gradient(x, out=out1)
 
-        self.f_SGD[self.f_SGD.subset_num].gradient(self.x_cil, out=out2)
+        self.F_SGD[self.F_SGD.subset_num].gradient(x, out=out2)
         np.testing.assert_allclose(out1.array, out2.array, atol=1e-3) 
 
-        out3 = self.x_cil.geometry.allocate()
-        out4 = self.x_cil.geometry.allocate()
+    @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
+    def test_with_cvxpy(self):
+        
+        u_cvxpy = cvxpy.Variable(self.ig.shape[0])
+        objective = cvxpy.Minimize( 0.5/self.n_subsets * cvxpy.sum_squares(self.Aop.A @ u_cvxpy - self.bop.array))
+        p = cvxpy.Problem(objective)
+        p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
+
+        step_size = 1./self.F.L # Theoretical step size = 1./(16*self.F_SAG.L) 
+        epochs = 100
+        sgd = GD(initial = self.initial, objective_function = self.F_SGD, step_size = step_size,
+                    max_iteration = epochs * self.n_subsets, 
+                    update_objective_interval = epochs * self.n_subsets)
+        sgd.run(verbose=0)    
 
-        # With preconditioning
-        self.f_SGD_precond.gradient(self.x_cil, out=out3)
+        np.testing.assert_allclose(p.value, sgd.objective[-1], atol=1e-1)
 
-        self.f_SGD_precond[self.f_SGD.subset_num].gradient(self.x_cil, out=out4)
-        out4*=self.precond(self.f_SGD_precond.subset_num, 3./self.ig.allocate(2.5))
-        np.testing.assert_allclose(out3.array, out4.array, atol=1e-3)         
+        np.testing.assert_allclose(u_cvxpy.value, sgd.solution.array, atol=1e-1)             
 
 
 

From 1ff0bd200743042cd7241cfca124d687df94635e Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Tue, 11 Oct 2022 16:48:03 +0100
Subject: [PATCH 08/19] remove preconditioning and 1/n factor

---
 .../Python/cil/optimisation/functions/Function.py    |  8 ++++----
 .../Python/cil/optimisation/functions/SGDFunction.py | 12 +++---------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index cb4730273e..97665c56de 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -330,7 +330,7 @@ class SubsetSumFunction(SumFunction):
 
     r"""SubsetSumFunction represents the following sum 
     
-    .. math:: \frac{1}{n}\sum_{i=1}^{n} F_{i} = \frac{1}{n}(F_{1} + F_{2} + ... + F_{n})
+    .. math:: \sum_{i=1}^{n} F_{i} = \frac{1}{n}(F_{1} + F_{2} + ... + F_{n})
 
     where :math:`n` is the number of subsets.
 
@@ -348,7 +348,7 @@ class SubsetSumFunction(SumFunction):
 
     .. math:: \sum_{i=1}^{n} F_{i}(x) = \sum_{i=1}^{n}\|A_{i} x - b_{i}\|^{2}
 
-    >>> f = SubsetSumFunction([LeastSquares(Ai, b=bi)]*n for Ai,bi in zip(A_subsets, b_subsets))   
+    >>> f = SubsetSumFunction([LeastSquares(Ai, b=bi)] for Ai,bi in zip(A_subsets, b_subsets))   
   
     """
     
@@ -375,11 +375,11 @@ def __call__(self, x):
         .. math:: \frac{1}{n}(F_{1} + F_{2} + ... + F_{n}))(x) = \frac{1}{n} * ( F_{1}(x) + F_{2}(x) + ... + F_{n}(x))
         """
                 		
-        return (1/self.num_subsets) * super(SubsetSumFunction, self).__call__(x)      
+        return super(SubsetSumFunction, self).__call__(x)      
         
     def _full_gradient(self, x, out=None):
         r""" Computes the full gradient at :code:`x`. It is the sum of all the gradients for each function. """
-        return (1/self.num_subsets) * super(SubsetSumFunction, self).gradient(x, out=out)
+        return super(SubsetSumFunction, self).gradient(x, out=out)
         
     def gradient(self, x, out=None):
         """ Computes the gradient for each subset function at :code:`x`."""      
diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
index 9828130274..867d4c9be6 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -21,7 +21,7 @@ class SGDFunction(SubsetSumFunction):
 
     r""" Stochastic Gradient Descent Function (SGDFunction) 
 
-    The SDGFunction represents the objective function :math:`\frac{1}{n}\sum_{i=1}^{n}F_{i}(x)`, where
+    The SDGFunction represents the objective function :math:`\sum_{i=1}^{n}F_{i}(x)`, where
     :math:`n` denotes the number of subsets. 
     
     Parameters:
@@ -32,8 +32,6 @@ class SGDFunction(SubsetSumFunction):
                Selection process for each function in the list. It can be :code:`random` or :code:`sequential`. 
     replacement : :obj:`boolean`. Default = :code:`True`
                The same subset can be selected when :code:`replacement=True`. 
-    precond : DataContainer
-               A preconditioner, i.e, an array that multiplies the output from the gradient of the selected function :math:`\partial_F_{i}(x)`.
     
     Note
     ----
@@ -46,10 +44,9 @@ class SGDFunction(SubsetSumFunction):
     
 """
   
-    def __init__(self, functions, sampling = "random", replacement = False, precond=None):
+    def __init__(self, functions, sampling = "random", replacement = False):
 
         super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement)
-        self.precond = precond
 
     def gradient(self, x, out):
         
@@ -61,8 +58,5 @@ def gradient(self, x, out):
         
         # Compute new gradient for current subset
         self.functions[self.subset_num].gradient(x, out=out)
-
-        # Apply preconditioning
-        if self.precond is not None:
-            out.multiply(self.precond(self.subset_num, x), out=out)            
+        out*=self.num_subsets         
         
\ No newline at end of file

From 1c015b162c6a809bbcaa3f5b8e365426eb3f44d3 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Thu, 13 Oct 2022 18:18:09 +0100
Subject: [PATCH 09/19] improve documentation and update tests

---
 .../cil/optimisation/functions/Function.py    | 11 ++++--
 Wrappers/Python/test/test_SGDFunction.py      | 37 +++++++++----------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index 97665c56de..abd0b8385f 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -370,19 +370,24 @@ def num_subsets(self):
         return self.num_functions
 
     def __call__(self, x):
-        r"""Returns the value of the averaged sum of functions at :math:`x`.		
-        		
-        .. math:: \frac{1}{n}(F_{1} + F_{2} + ... + F_{n}))(x) = \frac{1}{n} * ( F_{1}(x) + F_{2}(x) + ... + F_{n}(x))
+
+        r"""Returns the value of the sum of functions at :math:`x`.		
+
+        .. math:: \sum_{i=1}^{n}(F_{i}(x)) = (F_{1}(x) + F_{2}(x) + ... + F_{n}(x))
+
         """
                 		
         return super(SubsetSumFunction, self).__call__(x)      
         
     def _full_gradient(self, x, out=None):
+
         r""" Computes the full gradient at :code:`x`. It is the sum of all the gradients for each function. """
         return super(SubsetSumFunction, self).gradient(x, out=out)
         
     def gradient(self, x, out=None):
+
         """ Computes the gradient for each subset function at :code:`x`."""      
+
         raise NotImplemented
                
     def next_subset(self):
diff --git a/Wrappers/Python/test/test_SGDFunction.py b/Wrappers/Python/test/test_SGDFunction.py
index 474f6a790c..319853e84f 100644
--- a/Wrappers/Python/test/test_SGDFunction.py
+++ b/Wrappers/Python/test/test_SGDFunction.py
@@ -18,25 +18,24 @@ class TestSGDFunction(unittest.TestCase):
 
     def setUp(self):
         
-        np.random.seed(10)
-        n = 50
-        m = 500
+        # We test with LeastSquares problem with A^(mxn), n>m. (overparametrized/underdetermined)
+        # This is a specific setup, where SGD behaves nicely with fixed step size.
+        # In this setup, SGD becomes SGD-star, where SGD-star is proposed in https://arxiv.org/pdf/1905.11261.pdf (B.2 experiments)
 
-        A = np.random.uniform(0,1, (m, n)).astype('float32')
-        b = (A.dot(np.random.randn(n)) + 0.1*np.random.randn(m)).astype('float32')
+        np.random.seed(10)
+        n = 300  
+        m = 100 
+        A = np.random.normal(0,1, (m, n)).astype('float32')
+        b = np.random.normal(0,1, m).astype('float32')
 
         self.Aop = MatrixOperator(A)
         self.bop = VectorData(b) 
         
-        Anp = np.random.uniform(0,1, (m, n)).astype('float32')
-        xnp = np.random.uniform(0,1, (n,)).astype('float32')
-        bnp = Anp.dot(xnp)
-
         # split data, operators, functions
         self.n_subsets = 10
 
         Ai = np.vsplit(A, self.n_subsets) 
-        bi = [b[i:i+n] for i in range(0, m, n)]     
+        bi = [b[i:i+int(m/self.n_subsets)] for i in range(0, m, int(m/self.n_subsets))]       
 
         self.fi_cil = []
         for i in range(self.n_subsets):   
@@ -44,11 +43,10 @@ def setUp(self):
             self.bi_cil = VectorData(bi[i])
             self.fi_cil.append(LeastSquares(self.Ai_cil, self.bi_cil, c = 0.5))
             
-        self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5/self.n_subsets) 
+        self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5) 
         
         self.ig = self.Aop.domain
-        self.precond = lambda i, x: self.ig.allocate(1.)
-        self.F_SGD = SGDFunction(self.fi_cil, replacement = True, precond = self.precond)           
+        self.F_SGD = SGDFunction(self.fi_cil, replacement = True)           
 
         self.initial = self.ig.allocate()  
 
@@ -59,29 +57,30 @@ def test_gradient(self):
 
         x = self.ig.allocate('random')
 
-        # No preconditioning
         self.F_SGD.gradient(x, out=out1)
 
         self.F_SGD[self.F_SGD.subset_num].gradient(x, out=out2)
-        np.testing.assert_allclose(out1.array, out2.array, atol=1e-3) 
+        out2*=self.n_subsets
+
+        np.testing.assert_allclose(out1.array, out2.array, atol=1e-4) 
 
     @unittest.skipUnless(has_cvxpy, "CVXpy not installed") 
     def test_with_cvxpy(self):
         
         u_cvxpy = cvxpy.Variable(self.ig.shape[0])
-        objective = cvxpy.Minimize( 0.5/self.n_subsets * cvxpy.sum_squares(self.Aop.A @ u_cvxpy - self.bop.array))
+        objective = cvxpy.Minimize( 0.5*cvxpy.sum_squares(self.Aop.A @ u_cvxpy - self.bop.array))
         p = cvxpy.Problem(objective)
         p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
 
-        step_size = 1./self.F.L # Theoretical step size = 1./(16*self.F_SAG.L) 
-        epochs = 100
+        step_size = 1./self.F_SGD.L
+
+        epochs = 200
         sgd = GD(initial = self.initial, objective_function = self.F_SGD, step_size = step_size,
                     max_iteration = epochs * self.n_subsets, 
                     update_objective_interval = epochs * self.n_subsets)
         sgd.run(verbose=0)    
 
         np.testing.assert_allclose(p.value, sgd.objective[-1], atol=1e-1)
-
         np.testing.assert_allclose(u_cvxpy.value, sgd.solution.array, atol=1e-1)             
 
 

From 8c3e8a2e8a2acd7a318d959329562dec1ca5c25f Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Thu, 13 Oct 2022 19:19:56 +0100
Subject: [PATCH 10/19] fix test SubsetSumFunction

---
 Wrappers/Python/test/test_SubsetSumFunction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Wrappers/Python/test/test_SubsetSumFunction.py b/Wrappers/Python/test/test_SubsetSumFunction.py
index 6d2234497a..34933bcf59 100644
--- a/Wrappers/Python/test/test_SubsetSumFunction.py
+++ b/Wrappers/Python/test/test_SubsetSumFunction.py
@@ -35,7 +35,7 @@ def setUp(self):
             bi_cil = VectorData(bi[i])
             self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
 
-        self.f = (1/self.n_subsets) * LeastSquares(self.Aop, b=self.bop, c=1.0)
+        self.f = LeastSquares(self.Aop, b=self.bop, c=1.0)
         self.f_subset_sum_function = SubsetSumFunction(self.fi_cil)
         self.f_subset_sum_function_no_replacement = SubsetSumFunction(self.fi_cil, sampling="random", replacement=False) 
         self.f_subset_sum_function_sequential = SubsetSumFunction(self.fi_cil, sampling="sequential")              

From 136a2fe0f07d72a63eb3a8c9f4ba27cc0c7f9cfe Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Wed, 19 Oct 2022 10:15:11 +0100
Subject: [PATCH 11/19] improve docs, add random and single suffle

---
 .../cil/optimisation/functions/Function.py    | 57 +++++++++++++------
 .../cil/optimisation/functions/SGDFunction.py |  4 +-
 .../Python/test/test_SubsetSumFunction.py     | 50 +++++++++-------
 3 files changed, 74 insertions(+), 37 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index abd0b8385f..21d55e20b0 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -330,7 +330,7 @@ class SubsetSumFunction(SumFunction):
 
     r"""SubsetSumFunction represents the following sum 
     
-    .. math:: \sum_{i=1}^{n} F_{i} = \frac{1}{n}(F_{1} + F_{2} + ... + F_{n})
+    .. math:: \sum_{i=1}^{n} F_{i} = (F_{1} + F_{2} + ... + F_{n})
 
     where :math:`n` is the number of subsets.
 
@@ -342,6 +342,10 @@ class SubsetSumFunction(SumFunction):
                Selection process for each function in the list. It can be :code:`random` or :code:`sequential`. 
     replacement : :obj:`boolean`. Default = :code:`True`
                The same subset can be selected when :code:`replacement=True`. 
+    suffle : :obj:`string`. Default = :code:`random`.
+             This is only for the :code:`replacement=False` case. For :code:`suffle="single"`, we permute the list only once.
+             For :code:`suffle="random"`, we permute the list of subsets after one epoch. 
+
                 
     Example
     -------
@@ -355,16 +359,27 @@ class SubsetSumFunction(SumFunction):
     def __init__(self, functions, 
                  sampling = "random",
                  replacement = True,
-                 subset_init = -1):    
+                 suffle = "random"):    
         
         super(SubsetSumFunction, self).__init__(*functions)
         
-        self.subset_num = subset_init
-        self.data_passes = [0]
-        self.sampling = sampling  
+        self.sampling = sampling
         self.replacement = replacement
-        self.enumerate_functions = list(range(self.num_subsets))
-                                                            
+        self.suffle = suffle        
+        self.data_passes = [0]
+        self.subsets_used = []
+        self.subset_num = -1
+        self.index = 0
+                            
+        if self.replacement is False:
+                        
+            # create a list of subsets without replacement, first permutation
+            self.list_of_subsets = np.random.choice(range(self.num_subsets),self.num_subsets, replace=False)            
+                       
+            if self.suffle not in ["random","single"]:
+                raise NotImplementedError("Only {} and {} are implemented for the replacement=False case.".format("random","single"))            
+                        
+                                                                
     @property
     def num_subsets(self):
         return self.num_functions
@@ -392,26 +407,36 @@ def gradient(self, x, out=None):
                
     def next_subset(self):
         
-        """ Selects the next subset according to :code:`sampling`.          
-        """
-        
         if self.sampling=="random" :
             
             if self.replacement is False:
-                self.subset_num = np.random.choice(self.enumerate_functions)
-                self.enumerate_functions.remove(self.subset_num)
-                if len(self.enumerate_functions)==0:
-                    self.enumerate_functions = list(range(self.num_subsets))                
+                
+                # list of subsets already permuted
+                self.subset_num = self.list_of_subsets[self.index]
+                self.index+=1                
+                                
+                if self.index == self.num_subsets:
+                    self.index=0
+                    
+                    # For random suffle, at the end of each epoch, we permute the list again                    
+                    if self.suffle=="random":                    
+                        self.list_of_subsets = np.random.choice(range(self.num_subsets),self.num_subsets, replace=False)                              
+                                                        
             else:
-                self.subset_num = np.random.randint(0, self.num_subsets)
                 
-        elif self.sampling=="sequential":                                                                          
+                # at each iteration (not epoch) a subset is randomly selected
+                self.subset_num = np.random.randint(0, self.num_subsets)
+            
+        elif self.sampling=="sequential":    
+            
             if self.subset_num + 1 >= self.num_subsets:
                 self.subset_num = 0                
             else:
                 self.subset_num += 1         
         else:
             raise NotImplementedError("Only {} and {} are implemented at the moment.".format("random","sequential"))          
+            
+        self.subsets_used.append(self.subset_num)
                                   
 class ScaledFunction(Function):
     
diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
index 867d4c9be6..eb5a32cfde 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
@@ -44,9 +44,9 @@ class SGDFunction(SubsetSumFunction):
     
 """
   
-    def __init__(self, functions, sampling = "random", replacement = False):
+    def __init__(self, functions, sampling = "random", replacement = False, suffle="random"):
 
-        super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement)
+        super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement, suffle=suffle)
 
     def gradient(self, x, out):
         
diff --git a/Wrappers/Python/test/test_SubsetSumFunction.py b/Wrappers/Python/test/test_SubsetSumFunction.py
index 34933bcf59..58289f0f7d 100644
--- a/Wrappers/Python/test/test_SubsetSumFunction.py
+++ b/Wrappers/Python/test/test_SubsetSumFunction.py
@@ -36,10 +36,12 @@ def setUp(self):
             self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
 
         self.f = LeastSquares(self.Aop, b=self.bop, c=1.0)
-        self.f_subset_sum_function = SubsetSumFunction(self.fi_cil)
-        self.f_subset_sum_function_no_replacement = SubsetSumFunction(self.fi_cil, sampling="random", replacement=False) 
+        self.f_subset_sum_function = SubsetSumFunction(self.fi_cil) # default with replacement
         self.f_subset_sum_function_sequential = SubsetSumFunction(self.fi_cil, sampling="sequential")              
 
+        self.f_subset_sum_function_random_suffle = SubsetSumFunction(self.fi_cil, sampling="random", replacement=False, suffle="random") 
+        self.f_subset_sum_function_single_suffle = SubsetSumFunction(self.fi_cil, sampling="random", replacement=False, suffle="single")         
+ 
     def test_call_method(self):
         
         res1 = self.f(self.x_cil)
@@ -52,36 +54,46 @@ def test_full_gradient(self):
         res2 = self.f_subset_sum_function._full_gradient(self.x_cil)
         np.testing.assert_allclose(res1.array, res2.array, atol=1e-3)        
 
-    def test_sampling(self):
+    def test_sampling_sequential(self):
 
         # check sequential selection
         for i in range(self.n_subsets):
             self.f_subset_sum_function_sequential.next_subset()
             np.testing.assert_equal(self.f_subset_sum_function_sequential.subset_num, i)
 
-        # check random selection with no replacement
-        epochs = 2
-        choices = [[],[]]
-        for i in range(epochs):
-            for j in range(self.n_subsets):
-                self.f_subset_sum_function_no_replacement.next_subset()
-                choices[i].append(self.f_subset_sum_function_no_replacement.subset_num)
-        self.assertTrue( len(set(choices[0]))== len(set(choices[1])))
+    def test_sampling_random_with_replacement(self):
 
         # check random selection with replacement
-        epochs = 2
-        choices = [[],[]]
+        epochs = 3
+        choices = []
         for i in range(epochs):
             for j in range(self.n_subsets):
                 self.f_subset_sum_function.next_subset()
-                choices[i].append(self.f_subset_sum_function.subset_num)
-        self.assertTrue( len(set(choices[0]))!= len(set(choices[1])))  
+                choices.append(self.f_subset_sum_function.subset_num)
+        self.assertTrue( choices == self.f_subset_sum_function.subsets_used)          
+
+    def test_sampling_random_without_replacement_random_suffle(self):
+
+        # check random selection with no replacement
+        epochs = 3
+        choices = []
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function_random_suffle.next_subset()
+                choices.append(self.f_subset_sum_function_random_suffle.subset_num)
+        self.assertTrue( choices == self.f_subset_sum_function_random_suffle.subsets_used)  
+
+    def test_sampling_random_without_replacement_single_suffle(self):
 
-        with self.assertRaises(NotImplementedError):
-            f=SubsetSumFunction(self.fi_cil, sampling="not implemented")
-            f.next_subset()
+        # check random selection with no replacement
+        epochs = 3
+        choices = []
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function_single_suffle.next_subset()
+                choices.append(self.f_subset_sum_function_single_suffle.subset_num)
+        self.assertTrue( choices == self.f_subset_sum_function_single_suffle.subsets_used)         
 
-              
 
 
 

From f06c6038968660b49d0a6695cb91970596ed3e05 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Mon, 24 Oct 2022 19:00:27 +0100
Subject: [PATCH 12/19] rename from SGD to SG

---
 .../functions/{SGDFunction.py => SGFunction.py}    |  4 ++--
 .../Python/cil/optimisation/functions/__init__.py  |  2 +-
 .../{test_SGDFunction.py => test_SGFunction.py}    | 14 +++++++-------
 3 files changed, 10 insertions(+), 10 deletions(-)
 rename Wrappers/Python/cil/optimisation/functions/{SGDFunction.py => SGFunction.py} (94%)
 rename Wrappers/Python/test/{test_SGDFunction.py => test_SGFunction.py} (89%)

diff --git a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py b/Wrappers/Python/cil/optimisation/functions/SGFunction.py
similarity index 94%
rename from Wrappers/Python/cil/optimisation/functions/SGDFunction.py
rename to Wrappers/Python/cil/optimisation/functions/SGFunction.py
index eb5a32cfde..134689e205 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGDFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGFunction.py
@@ -17,7 +17,7 @@
 
 from cil.optimisation.functions import SubsetSumFunction
 
-class SGDFunction(SubsetSumFunction):
+class SGFunction(SubsetSumFunction):
 
     r""" Stochastic Gradient Descent Function (SGDFunction) 
 
@@ -46,7 +46,7 @@ class SGDFunction(SubsetSumFunction):
   
     def __init__(self, functions, sampling = "random", replacement = False, suffle="random"):
 
-        super(SGDFunction, self).__init__(functions, sampling = sampling, replacement = replacement, suffle=suffle)
+        super(SGFunction, self).__init__(functions, sampling = sampling, replacement = replacement, suffle=suffle)
 
     def gradient(self, x, out):
         
diff --git a/Wrappers/Python/cil/optimisation/functions/__init__.py b/Wrappers/Python/cil/optimisation/functions/__init__.py
index a5e93622b5..035c55e736 100644
--- a/Wrappers/Python/cil/optimisation/functions/__init__.py
+++ b/Wrappers/Python/cil/optimisation/functions/__init__.py
@@ -34,4 +34,4 @@
 from .KullbackLeibler import KullbackLeibler
 from .Rosenbrock import Rosenbrock
 from .TotalVariation import TotalVariation
-from .SGDFunction import SGDFunction
+from .SGFunction import SGFunction
diff --git a/Wrappers/Python/test/test_SGDFunction.py b/Wrappers/Python/test/test_SGFunction.py
similarity index 89%
rename from Wrappers/Python/test/test_SGDFunction.py
rename to Wrappers/Python/test/test_SGFunction.py
index 319853e84f..1ac463178b 100644
--- a/Wrappers/Python/test/test_SGDFunction.py
+++ b/Wrappers/Python/test/test_SGFunction.py
@@ -1,7 +1,7 @@
 import unittest
 from utils import initialise_tests
 from cil.optimisation.operators import MatrixOperator
-from cil.optimisation.functions import LeastSquares, SubsetSumFunction, SGDFunction
+from cil.optimisation.functions import LeastSquares, SubsetSumFunction, SGFunction
 from cil.optimisation.algorithms import GD
 from cil.framework import VectorData
 import numpy as np                  
@@ -13,7 +13,7 @@
 if has_cvxpy:
     import cvxpy
 
-class TestSGDFunction(unittest.TestCase):
+class TestSGFunction(unittest.TestCase):
                     
 
     def setUp(self):
@@ -46,7 +46,7 @@ def setUp(self):
         self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5) 
         
         self.ig = self.Aop.domain
-        self.F_SGD = SGDFunction(self.fi_cil, replacement = True)           
+        self.F_SG = SGFunction(self.fi_cil, replacement = True)           
 
         self.initial = self.ig.allocate()  
 
@@ -57,9 +57,9 @@ def test_gradient(self):
 
         x = self.ig.allocate('random')
 
-        self.F_SGD.gradient(x, out=out1)
+        self.F_SG.gradient(x, out=out1)
 
-        self.F_SGD[self.F_SGD.subset_num].gradient(x, out=out2)
+        self.F_SG[self.F_SG.subset_num].gradient(x, out=out2)
         out2*=self.n_subsets
 
         np.testing.assert_allclose(out1.array, out2.array, atol=1e-4) 
@@ -72,10 +72,10 @@ def test_with_cvxpy(self):
         p = cvxpy.Problem(objective)
         p.solve(verbose=True, solver=cvxpy.SCS, eps=1e-4) 
 
-        step_size = 1./self.F_SGD.L
+        step_size = 1./self.F_SG.L
 
         epochs = 200
-        sgd = GD(initial = self.initial, objective_function = self.F_SGD, step_size = step_size,
+        sgd = GD(initial = self.initial, objective_function = self.F_SG, step_size = step_size,
                     max_iteration = epochs * self.n_subsets, 
                     update_objective_interval = epochs * self.n_subsets)
         sgd.run(verbose=0)    

From f867d19bc66fa7bf72984933f6c53af766be72e9 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Tue, 25 Oct 2022 08:19:55 +0100
Subject: [PATCH 13/19] rename to full_gradient

---
 Wrappers/Python/cil/optimisation/functions/Function.py | 2 +-
 Wrappers/Python/test/test_SubsetSumFunction.py         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index 21d55e20b0..f1a6479cdb 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -394,7 +394,7 @@ def __call__(self, x):
                 		
         return super(SubsetSumFunction, self).__call__(x)      
         
-    def _full_gradient(self, x, out=None):
+    def full_gradient(self, x, out=None):
 
         r""" Computes the full gradient at :code:`x`. It is the sum of all the gradients for each function. """
         return super(SubsetSumFunction, self).gradient(x, out=out)
diff --git a/Wrappers/Python/test/test_SubsetSumFunction.py b/Wrappers/Python/test/test_SubsetSumFunction.py
index 58289f0f7d..bfe659b568 100644
--- a/Wrappers/Python/test/test_SubsetSumFunction.py
+++ b/Wrappers/Python/test/test_SubsetSumFunction.py
@@ -51,7 +51,7 @@ def test_call_method(self):
     def test_full_gradient(self):
         
         res1 = self.f.gradient(self.x_cil)
-        res2 = self.f_subset_sum_function._full_gradient(self.x_cil)
+        res2 = self.f_subset_sum_function.full_gradient(self.x_cil)
         np.testing.assert_allclose(res1.array, res2.array, atol=1e-3)        
 
     def test_sampling_sequential(self):

From 5a0aaf02981b1191db8443191f57efc15228ec34 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 14:56:59 +0000
Subject: [PATCH 14/19] add optimisation utilities, FunctionNumberGenerator

---
 .../utilities/FunctionNumberGenerator.py      | 95 +++++++++++++++++++
 .../cil/optimisation/utilities/__init__.py    | 18 ++++
 2 files changed, 113 insertions(+)
 create mode 100644 Wrappers/Python/cil/optimisation/utilities/FunctionNumberGenerator.py
 create mode 100644 Wrappers/Python/cil/optimisation/utilities/__init__.py

diff --git a/Wrappers/Python/cil/optimisation/utilities/FunctionNumberGenerator.py b/Wrappers/Python/cil/optimisation/utilities/FunctionNumberGenerator.py
new file mode 100644
index 0000000000..8dcb0e68d4
--- /dev/null
+++ b/Wrappers/Python/cil/optimisation/utilities/FunctionNumberGenerator.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+#   This work is part of the Core Imaging Library (CIL) developed by CCPi 
+#   (Collaborative Computational Project in Tomographic Imaging), with 
+#   substantial contributions by UKRI-STFC and University of Manchester.
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+import numpy as np
+
+def FunctionNumberGenerator(num_functions, sampling_method = "random"):
+
+    r""" FunctionNumberGenerator
+
+        The FunctionNumberGenerator selects randomly a number from a list of numbers/functions of length :code:`num_functions`.
+        
+        Parameters:
+        -----------
+        num_functions : :obj: int 
+            The total number of functions used in our Stochastic estimators e.g.,
+        sampling_method : :obj:`string`, Default = :code:`random`
+            Selection process for each function in the list. It can be :code:`random`, :code:`random_permutation`,  :code:`fixed_permutation`.               
+               -  :code:`random`: Every function is selected randomly with replacement.
+               -  :code:`random_permutation`: Every function is selected randomly without replacement. After selecting all the functions in the list, i.e., after one epoch, the list is randomly permuted.
+               -  :code:`fixed_permuation`: Every function is selected randomly without replacement and the list of function is permuted only once.
+
+    Example
+    -------
+
+    >>> fng = FunctionNumberGenerator(10)
+    >>> print(next(fng))
+
+    >>> number_of_functions = 10
+    >>> fng = FunctionNumberGenerator(number_of_functions, sampling_method="fixed_permutation")
+    >>> epochs = 2
+    >>> generated_numbers=[print(next(fng), end=' ') for _ in range(epochs*number_of_functions)]
+                            
+    """    
+
+    if not isinstance(num_functions, int):
+        raise ValueError(" Integer is required for `num_functions`, {} is passed. ".format(num_functions)  )
+
+    # Accepted sampling_methods
+    default_sampling_methods = ["random", "random_permutation","fixed_permutation"]
+
+    if sampling_method not in default_sampling_methods:
+        raise NotImplementedError("Only {} are implemented at the moment.".format(default_sampling_methods)) 
+
+    replacement=False
+    if sampling_method=="random":
+        replacement=True
+    else:
+        if sampling_method=="random_permutation":
+            shuffle="random"
+        else:
+            shuffle="single"
+
+    function_num = -1
+    index = 0    
+
+    if replacement is False:                    
+        # create a list of functions without replacement, first permutation
+        list_of_functions = np.random.choice(range(num_functions),num_functions, replace=False)            
+                          
+    while(True):                
+                
+        if replacement is False:
+            
+            # list of functions already permuted
+            function_num = list_of_functions[index]
+            index+=1                
+                            
+            if index == num_functions:
+                index=0
+                
+                # For random shuffle, at the end of each epoch, we permute the list again                    
+                if shuffle=="random":                    
+                    list_of_functions = np.random.choice(range(num_functions),num_functions, replace=False)                                                                                          
+        else:
+            
+            # at each iteration (not epoch) function is randomly selected
+            function_num = np.random.randint(0, num_functions)
+        
+        yield function_num        
+
+
diff --git a/Wrappers/Python/cil/optimisation/utilities/__init__.py b/Wrappers/Python/cil/optimisation/utilities/__init__.py
new file mode 100644
index 0000000000..4c5d41ada7
--- /dev/null
+++ b/Wrappers/Python/cil/optimisation/utilities/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#   This work is part of the Core Imaging Library (CIL) developed by CCPi 
+#   (Collaborative Computational Project in Tomographic Imaging), with 
+#   substantial contributions by UKRI-STFC and University of Manchester.
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+from .FunctionNumberGenerator import FunctionNumberGenerator

From 8f4d25cc8704ef30ac144d0eda48225056eecee2 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 14:58:13 +0000
Subject: [PATCH 15/19] rename SubsetSumFunction to
 ApproximateGradientSumFunction, use FunctionNumberGenerator

---
 .../cil/optimisation/functions/Function.py    | 115 ++++++-----------
 .../cil/optimisation/functions/SGFunction.py  |  28 ++---
 .../cil/optimisation/functions/__init__.py    |   3 +-
 .../test_ApproximateGradientSumFunction.py    | 118 ++++++++++++++++++
 Wrappers/Python/test/test_SGFunction.py       |   9 +-
 5 files changed, 178 insertions(+), 95 deletions(-)
 create mode 100644 Wrappers/Python/test/test_ApproximateGradientSumFunction.py

diff --git a/Wrappers/Python/cil/optimisation/functions/Function.py b/Wrappers/Python/cil/optimisation/functions/Function.py
index f1a6479cdb..c9824cb3e1 100644
--- a/Wrappers/Python/cil/optimisation/functions/Function.py
+++ b/Wrappers/Python/cil/optimisation/functions/Function.py
@@ -20,6 +20,7 @@
 from numbers import Number
 import numpy as np
 from functools import reduce
+from cil.optimisation.utilities import FunctionNumberGenerator
 
 class Function(object):
     
@@ -326,63 +327,50 @@ def __add__(self, other):
         else:
             return super(SumFunction, self).__add__(other)
 
-class SubsetSumFunction(SumFunction):
+class ApproximateGradientSumFunction(SumFunction):
 
-    r"""SubsetSumFunction represents the following sum 
+    r"""ApproximateGradientSumFunction represents the following sum 
     
     .. math:: \sum_{i=1}^{n} F_{i} = (F_{1} + F_{2} + ... + F_{n})
 
-    where :math:`n` is the number of subsets.
+    where :math:`n` is the number of functions.
 
     Parameters:
     -----------
     functions : list(functions) 
                 A list of functions: :code:`[F_{1}, F_{2}, ..., F_{n}]`. Each function is assumed to be smooth function with an implemented :func:`~Function.gradient` method.
-    sampling : :obj:`string`, Default = :code:`random`
-               Selection process for each function in the list. It can be :code:`random` or :code:`sequential`. 
-    replacement : :obj:`boolean`. Default = :code:`True`
-               The same subset can be selected when :code:`replacement=True`. 
-    suffle : :obj:`string`. Default = :code:`random`.
-             This is only for the :code:`replacement=False` case. For :code:`suffle="single"`, we permute the list only once.
-             For :code:`suffle="random"`, we permute the list of subsets after one epoch. 
-
+    selection : :obj:`string`, Default = :code:`random`. It can be :code:`random`, :code:`random_permutation`,  :code:`fixed_permutation`.
+               Selection method for each function in the list.                 
+               -  :code:`random`: Every function is selected randomly with replacement.
+               -  :code:`random_permutation`: Every function is selected randomly without replacement. After selecting all the functions in the list, i.e., after one epoch, the list is randomly permuted.
+               -  :code:`fixed_permuation`: Every function is selected randomly without replacement and the list of function is permuted only once.
                 
     Example
     -------
 
     .. math:: \sum_{i=1}^{n} F_{i}(x) = \sum_{i=1}^{n}\|A_{i} x - b_{i}\|^{2}
 
-    >>> f = SubsetSumFunction([LeastSquares(Ai, b=bi)] for Ai,bi in zip(A_subsets, b_subsets))   
+    >>> list_of_functions = [LeastSquares(Ai, b=bi)] for Ai,bi in zip(A_subsets, b_subsets))   
+    >>> f = ApproximateGradientSumFunction(list_of_functions) 
+
+    >>> list_of_functions = [LeastSquares(Ai, b=bi)] for Ai,bi in zip(A_subsets, b_subsets)) 
+    >>> fng = FunctionNumberGenetor(len(list_of_function), sampling_method="fixed_permutation")
+    >>> f = ApproximateGradientSumFunction(list_of_functions, fng)   
   
+
     """
     
-    def __init__(self, functions, 
-                 sampling = "random",
-                 replacement = True,
-                 suffle = "random"):    
-        
-        super(SubsetSumFunction, self).__init__(*functions)
-        
-        self.sampling = sampling
-        self.replacement = replacement
-        self.suffle = suffle        
-        self.data_passes = [0]
-        self.subsets_used = []
-        self.subset_num = -1
-        self.index = 0
-                            
-        if self.replacement is False:
-                        
-            # create a list of subsets without replacement, first permutation
-            self.list_of_subsets = np.random.choice(range(self.num_subsets),self.num_subsets, replace=False)            
-                       
-            if self.suffle not in ["random","single"]:
-                raise NotImplementedError("Only {} and {} are implemented for the replacement=False case.".format("random","single"))            
-                        
-                                                                
-    @property
-    def num_subsets(self):
-        return self.num_functions
+    def __init__(self, functions, selection=None):    
+        
+        super(ApproximateGradientSumFunction, self).__init__(*functions)
+        
+        self.functions_used = []   
+        self.functions_passes = [0] 
+
+        if isinstance(selection, str):
+            self.selection = FunctionNumberGenerator(len(functions), sampling_method=selection )
+        else:
+            self.selection = selection    
 
     def __call__(self, x):
 
@@ -392,51 +380,28 @@ def __call__(self, x):
 
         """
                 		
-        return super(SubsetSumFunction, self).__call__(x)      
+        return super(ApproximateGradientSumFunction, self).__call__(x)      
         
     def full_gradient(self, x, out=None):
 
         r""" Computes the full gradient at :code:`x`. It is the sum of all the gradients for each function. """
-        return super(SubsetSumFunction, self).gradient(x, out=out)
+        return super(ApproximateGradientSumFunction, self).gradient(x, out=out)
         
-    def gradient(self, x, out=None):
-
-        """ Computes the gradient for each subset function at :code:`x`."""      
+    def approximate_gradient(self, function_num, x,  out=None):
 
+        """ Computes the gradient for each selected function at :code:`x`."""      
         raise NotImplemented
+
+    def gradient(self, x, out=None):
+
+        self.next_function_num()
+        return self.approximate_gradient(self.function_num, x, out=out)
                
-    def next_subset(self):
+    def next_function_num(self):
         
-        if self.sampling=="random" :
-            
-            if self.replacement is False:
-                
-                # list of subsets already permuted
-                self.subset_num = self.list_of_subsets[self.index]
-                self.index+=1                
-                                
-                if self.index == self.num_subsets:
-                    self.index=0
-                    
-                    # For random suffle, at the end of each epoch, we permute the list again                    
-                    if self.suffle=="random":                    
-                        self.list_of_subsets = np.random.choice(range(self.num_subsets),self.num_subsets, replace=False)                              
-                                                        
-            else:
-                
-                # at each iteration (not epoch) a subset is randomly selected
-                self.subset_num = np.random.randint(0, self.num_subsets)
-            
-        elif self.sampling=="sequential":    
-            
-            if self.subset_num + 1 >= self.num_subsets:
-                self.subset_num = 0                
-            else:
-                self.subset_num += 1         
-        else:
-            raise NotImplementedError("Only {} and {} are implemented at the moment.".format("random","sequential"))          
-            
-        self.subsets_used.append(self.subset_num)
+        # Select the next function using the selection:=function_number_generator        
+        self.function_num = next(self.selection)
+        self.functions_used.append(self.function_num)
                                   
 class ScaledFunction(Function):
     
diff --git a/Wrappers/Python/cil/optimisation/functions/SGFunction.py b/Wrappers/Python/cil/optimisation/functions/SGFunction.py
index 134689e205..38725d2ea3 100644
--- a/Wrappers/Python/cil/optimisation/functions/SGFunction.py
+++ b/Wrappers/Python/cil/optimisation/functions/SGFunction.py
@@ -15,13 +15,13 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-from cil.optimisation.functions import SubsetSumFunction
+from cil.optimisation.functions import ApproximateGradientSumFunction
 
-class SGFunction(SubsetSumFunction):
+class SGFunction(ApproximateGradientSumFunction):
 
-    r""" Stochastic Gradient Descent Function (SGDFunction) 
+    r""" Stochastic Gradient Function (SGFunction) 
 
-    The SDGFunction represents the objective function :math:`\sum_{i=1}^{n}F_{i}(x)`, where
+    The SGFunction represents the objective function :math:`\sum_{i=1}^{n}F_{i}(x)`, where
     :math:`n` denotes the number of subsets. 
     
     Parameters:
@@ -36,7 +36,7 @@ class SGFunction(SubsetSumFunction):
     Note
     ----
         
-    The :meth:`~SGDFunction.gradient` computes the `gradient` of one function from the list :math:`[F_{1},\cdots,F_{n}]`,
+    The :meth:`~SGFunction.gradient` computes the `gradient` of one function from the list :math:`[F_{1},\cdots,F_{n}]`,
     
     .. math:: \partial F_{i}(x) .
 
@@ -44,19 +44,15 @@ class SGFunction(SubsetSumFunction):
     
 """
   
-    def __init__(self, functions, sampling = "random", replacement = False, suffle="random"):
+    def __init__(self, functions, selection="random"):
 
-        super(SGFunction, self).__init__(functions, sampling = sampling, replacement = replacement, suffle=suffle)
+        super(SGFunction, self).__init__(functions, selection)
 
-    def gradient(self, x, out):
+    def approximate_gradient(self,subset_num, x, out):
         
         """ Returns the gradient of the selected function at :code:`x`. The function is selected using the :meth:`~SubsetSumFunction.next_subset`
-        """
-
-        # Select the next subset
-        self.next_subset()
-        
-        # Compute new gradient for current subset
-        self.functions[self.subset_num].gradient(x, out=out)
-        out*=self.num_subsets         
+        """        
+        # Compute new gradient for current function
+        self.functions[self.function_num].gradient(x, out=out)
+        out*=self.num_functions
         
\ No newline at end of file
diff --git a/Wrappers/Python/cil/optimisation/functions/__init__.py b/Wrappers/Python/cil/optimisation/functions/__init__.py
index 035c55e736..8482113e14 100644
--- a/Wrappers/Python/cil/optimisation/functions/__init__.py
+++ b/Wrappers/Python/cil/optimisation/functions/__init__.py
@@ -17,7 +17,7 @@
 
 from .Function import Function
 from .Function import SumFunction
-from .Function import SubsetSumFunction
+from .Function import ApproximateGradientSumFunction
 from .Function import ScaledFunction
 from .Function import SumScalarFunction
 from .Function import ConstantFunction
@@ -35,3 +35,4 @@
 from .Rosenbrock import Rosenbrock
 from .TotalVariation import TotalVariation
 from .SGFunction import SGFunction
+
diff --git a/Wrappers/Python/test/test_ApproximateGradientSumFunction.py b/Wrappers/Python/test/test_ApproximateGradientSumFunction.py
new file mode 100644
index 0000000000..d74ba29781
--- /dev/null
+++ b/Wrappers/Python/test/test_ApproximateGradientSumFunction.py
@@ -0,0 +1,118 @@
+import unittest
+from utils import initialise_tests
+from cil.optimisation.operators import MatrixOperator
+from cil.optimisation.functions import LeastSquares, ApproximateGradientSumFunction
+from cil.optimisation.utilities import FunctionNumberGenerator
+from cil.framework import VectorData
+import numpy as np                  
+                  
+
+initialise_tests()
+
+class TestApproximateGradientSumFunction(unittest.TestCase):
+                    
+    def setUp(self):   
+
+        np.random.seed(10)
+        n = 50
+        m = 500
+        self.n_subsets = 10
+
+        Anp = np.random.uniform(0,1, (m, n)).astype('float32')
+        xnp = np.random.uniform(0,1, (n,)).astype('float32')
+        bnp = Anp.dot(xnp)
+
+        Ai = np.vsplit(Anp, self.n_subsets) 
+        bi = [bnp[i:i+n] for i in range(0, m, n)]     
+
+        self.Aop = MatrixOperator(Anp)
+        self.bop = VectorData(bnp) 
+        ig = self.Aop.domain        
+        self.x_cil = ig.allocate('random')
+
+        self.fi_cil = []
+        for i in range(self.n_subsets):   
+            Ai_cil = MatrixOperator(Ai[i])
+            bi_cil = VectorData(bi[i])
+            self.fi_cil.append(LeastSquares(Ai_cil, bi_cil, c=1.0))
+
+        self.f = LeastSquares(self.Aop, b=self.bop, c=1.0)
+        generator = FunctionNumberGenerator(self.n_subsets)
+        self.f_subset_sum_function = ApproximateGradientSumFunction(self.fi_cil, generator) # default with replacement
+        
+        generator = FunctionNumberGenerator(self.n_subsets, sampling_method="random_permutation")
+        self.f_subset_sum_function_random_suffle = ApproximateGradientSumFunction(self.fi_cil, generator) 
+
+        generator = FunctionNumberGenerator(self.n_subsets, sampling_method="fixed_permutation")
+        self.f_subset_sum_function_single_suffle = ApproximateGradientSumFunction(self.fi_cil, generator)         
+ 
+    def test_call_method(self):
+        
+        res1 = self.f(self.x_cil)
+        res2 = self.f_subset_sum_function(self.x_cil)
+        np.testing.assert_allclose(res1, res2)
+
+    def test_full_gradient(self):
+        
+        res1 = self.f.gradient(self.x_cil)
+        res2 = self.f_subset_sum_function.full_gradient(self.x_cil)
+        np.testing.assert_allclose(res1.array, res2.array, atol=1e-3)        
+
+    # def test_sampling_sequential(self):
+
+    #     # check sequential selection
+    #     for i in range(self.n_subsets):
+    #         self.f_subset_sum_function_sequential.next_subset()
+    #         np.testing.assert_equal(self.f_subset_sum_function_sequential.subset_num, i)
+
+    def test_sampling_random_with_replacement(self):
+
+        # check random selection with replacement
+        epochs = 3
+        choices = []
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function.next_function_num()
+                choices.append(self.f_subset_sum_function.function_num)
+        self.assertTrue( choices == self.f_subset_sum_function.functions_used)          
+
+    def test_sampling_random_without_replacement_random_suffle(self):
+
+        # check random selection with no replacement
+        epochs = 3
+        choices = []
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function_random_suffle.next_function_num()
+                choices.append(self.f_subset_sum_function_random_suffle.function_num)
+        self.assertTrue( choices == self.f_subset_sum_function_random_suffle.functions_used)  
+
+    def test_sampling_random_without_replacement_single_suffle(self):
+
+        # check random selection with no replacement
+        epochs = 3
+        choices = []
+        for i in range(epochs):
+            for j in range(self.n_subsets):
+                self.f_subset_sum_function_single_suffle.next_function_num()
+                choices.append(self.f_subset_sum_function_single_suffle.function_num)
+        self.assertTrue( choices == self.f_subset_sum_function_single_suffle.functions_used)         
+
+
+
+
+
+                      
+
+
+
+
+
+
+
+
+
+
+
+
+           
\ No newline at end of file
diff --git a/Wrappers/Python/test/test_SGFunction.py b/Wrappers/Python/test/test_SGFunction.py
index 1ac463178b..4222a39c28 100644
--- a/Wrappers/Python/test/test_SGFunction.py
+++ b/Wrappers/Python/test/test_SGFunction.py
@@ -1,7 +1,8 @@
 import unittest
 from utils import initialise_tests
 from cil.optimisation.operators import MatrixOperator
-from cil.optimisation.functions import LeastSquares, SubsetSumFunction, SGFunction
+from cil.optimisation.functions import LeastSquares, SGFunction
+from cil.optimisation.utilities import FunctionNumberGenerator
 from cil.optimisation.algorithms import GD
 from cil.framework import VectorData
 import numpy as np                  
@@ -46,7 +47,9 @@ def setUp(self):
         self.F = LeastSquares(self.Aop, b=self.bop, c = 0.5) 
         
         self.ig = self.Aop.domain
-        self.F_SG = SGFunction(self.fi_cil, replacement = True)           
+
+        generator = FunctionNumberGenerator(self.n_subsets)
+        self.F_SG = SGFunction(self.fi_cil, generator)           
 
         self.initial = self.ig.allocate()  
 
@@ -59,7 +62,7 @@ def test_gradient(self):
 
         self.F_SG.gradient(x, out=out1)
 
-        self.F_SG[self.F_SG.subset_num].gradient(x, out=out2)
+        self.F_SG[self.F_SG.function_num].gradient(x, out=out2)
         out2*=self.n_subsets
 
         np.testing.assert_allclose(out1.array, out2.array, atol=1e-4) 

From b1b99b9654837f9599c6cb673b72b47862ab48cf Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 15:44:15 +0000
Subject: [PATCH 16/19] add optimisation utilities

---
 Wrappers/Python/setup.py | 59 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 Wrappers/Python/setup.py

diff --git a/Wrappers/Python/setup.py b/Wrappers/Python/setup.py
new file mode 100644
index 0000000000..3ad2337a15
--- /dev/null
+++ b/Wrappers/Python/setup.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#   This work is part of the Core Imaging Library (CIL) developed by CCPi 
+#   (Collaborative Computational Project in Tomographic Imaging), with 
+#   substantial contributions by UKRI-STFC and University of Manchester.
+
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+from setuptools import setup
+import os
+import sys
+
+
+
+cil_version = '21.2.0'
+if '141' != '-1':
+    cil_version += '.dev141'
+
+
+setup(
+    name="cil",
+    version=cil_version,
+    packages=['cil' , 'cil.io',
+              'cil.framework', 'cil.optimisation',
+              'cil.optimisation.functions',
+              'cil.optimisation.algorithms',
+              'cil.optimisation.operators',
+              'cil.optimisation.utilities',
+              'cil.processors',
+              'cil.utilities', 'cil.utilities.jupyter',
+              'cil.plugins', 
+              'cil.plugins.ccpi_regularisation', 
+              'cil.plugins.ccpi_regularisation.functions',
+              'cil.plugins.tigre',
+              'cil.plugins.astra',
+              'cil.plugins.astra.operators',
+              'cil.plugins.astra.processors',
+              'cil.plugins.astra.utilities',
+              'cil.recon'],
+
+
+    # metadata for upload to PyPI
+    author="CCPi developers",
+    maintainer="Edoardo Pasca",
+    maintainer_email="edoardo.pasca@stfc.ac.uk",
+    description='CCPi Core Imaging Library',
+    license="Apache v2.0",
+    keywords="Python Framework",
+    url="http://www.ccpi.ac.uk/cil",
+)

From 50aad5d1db2086d4a8a3e36bddd8dd143ab1cf21 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 17:58:09 +0000
Subject: [PATCH 17/19] update setup.py

---
 Wrappers/Python/setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Wrappers/Python/setup.py b/Wrappers/Python/setup.py
index 3ad2337a15..799164fcff 100644
--- a/Wrappers/Python/setup.py
+++ b/Wrappers/Python/setup.py
@@ -34,7 +34,6 @@
               'cil.optimisation.functions',
               'cil.optimisation.algorithms',
               'cil.optimisation.operators',
-              'cil.optimisation.utilities',
               'cil.processors',
               'cil.utilities', 'cil.utilities.jupyter',
               'cil.plugins', 

From fd2c75e0e6a6ce00cabbe6314f13b9957e69a437 Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 17:58:37 +0000
Subject: [PATCH 18/19] update setup.py

---
 Wrappers/Python/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Wrappers/Python/setup.py b/Wrappers/Python/setup.py
index 799164fcff..2a9172a36c 100644
--- a/Wrappers/Python/setup.py
+++ b/Wrappers/Python/setup.py
@@ -34,6 +34,7 @@
               'cil.optimisation.functions',
               'cil.optimisation.algorithms',
               'cil.optimisation.operators',
+              'cil.optimisation.utilities',              
               'cil.processors',
               'cil.utilities', 'cil.utilities.jupyter',
               'cil.plugins', 

From a81b01f59d786cf0c7163f65aa39482569eb379c Mon Sep 17 00:00:00 2001
From: epapoutsellis <epapoutsellis@gmail.com>
Date: Fri, 18 Nov 2022 18:25:00 +0000
Subject: [PATCH 19/19] update setup.py

---
 Wrappers/Python/CMake/setup.py.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Wrappers/Python/CMake/setup.py.in b/Wrappers/Python/CMake/setup.py.in
index 2f5adaad88..edc93af486 100644
--- a/Wrappers/Python/CMake/setup.py.in
+++ b/Wrappers/Python/CMake/setup.py.in
@@ -34,6 +34,7 @@ setup(
               'cil.optimisation.functions',
               'cil.optimisation.algorithms',
               'cil.optimisation.operators',
+              'cil.optimisation.utilities',
               'cil.processors',
               'cil.utilities', 'cil.utilities.jupyter',
               'cil.plugins',