#185 add float types #2 - Post initial review

X-DataInitiative · May 22, 2018 · a336848 · a336848
1 parent cfeae7c
commit a336848
Show file tree

Hide file tree

Showing 53 changed files with 372 additions and 400 deletions.
diff --git a/lib/cpp-test/serialization/CMakeLists.txt b/lib/cpp-test/serialization/CMakeLists.txt
@@ -1,3 +1,6 @@
+# This test is quite long and can cause issues so
+## it is not included in the normal set of gtests
+
 add_executable(tick_test_serialization solver.cpp)
 
 target_link_libraries(tick_test_serialization

diff --git a/tick/__init__.py b/tick/__init__.py
@@ -1,3 +1,6 @@
+# -*- coding: utf-8 -*-
+"""tick module
+"""
 # License: BSD 3 clause
 
-import tick.base
+import tick.base
diff --git a/tick/base/base.py b/tick/base/base.py
@@ -560,6 +560,6 @@ def _inc_attr(self, key: str, step: int = 1):
 
     def __str__(self):
         dic = self._as_dict()
-        if isinstance(dic['dtype'], np.dtype):
-          dic['dtype'] = dic['dtype'].name
+        if 'dtype' in dic and isinstance(dic['dtype'], np.dtype):
+            dic['dtype'] = dic['dtype'].name
         return json.dumps(dic, sort_keys=True, indent=2)
diff --git a/tick/base/simulation/simu_with_features.py b/tick/base/simulation/simu_with_features.py
@@ -64,8 +64,8 @@ class SimuWithFeatures(Simu):
     verbose : `bool`
         If True, print things
 
-    dtype : `{'float64', 'float32'}`
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
         Used in the case features is None
     """
 

diff --git a/tick/base_model/model.py b/tick/base_model/model.py
@@ -35,8 +35,8 @@ class Model(ABC, Base):
     n_passes_over_data : `int` (read-only)
         Number of effective passes through the data
 
-    dtype : `{'float64', 'float32'}`
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     Notes
     -----

diff --git a/tick/base_model/model_labels_features.py b/tick/base_model/model_labels_features.py
@@ -13,8 +13,8 @@ class ModelLabelsFeatures(Model):
 
     Parameters
     ----------
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     Attributes
     ----------
@@ -59,7 +59,7 @@ def __init__(self):
         self.n_features = None
         self.n_samples = None
 
-    def check_set_dtype(self, features: np.ndarray, labels: np.ndarray):
+    def _check_set_dtype(self, features: np.ndarray, labels: np.ndarray):
         self.dtype = features.dtype
         if self.dtype != labels.dtype:
             raise ValueError("Features and labels differ in data types")
@@ -81,12 +81,12 @@ def fit(self, features: np.ndarray, labels: np.ndarray) -> Model:
             The current instance with given data
         """
         # The fit from Model calls the _set_data below
-        self.check_set_dtype(features, labels)
+        self._check_set_dtype(features, labels)
         return Model.fit(self, features, labels)
 
     def _set_data(self, features, labels):
         n_samples, n_features = features.shape
-        self.check_set_dtype(features, labels)
+        self._check_set_dtype(features, labels)
         if n_samples != labels.shape[0]:
             raise ValueError(("Features has %i samples while labels "
                               "have %i" % (n_samples, labels.shape[0])))

diff --git a/tick/base_model/model_lipschitz.py b/tick/base_model/model_lipschitz.py
@@ -12,8 +12,8 @@ class ModelLipschitz(Model):
 
     Parameters
     ----------
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     Notes
     -----

diff --git a/tick/base_model/model_second_order.py b/tick/base_model/model_second_order.py
@@ -13,16 +13,14 @@ class ModelSecondOrder(ModelFirstOrder):
     order and second information, namely gradient and hessian norm
     information
 
-    Parameters
-    ----------
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     Attributes
     ----------
     n_calls_hessian_norm : `int` (read-only)
         Number of times ``hessian_norm`` has been called so far
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     Notes
     -----
     This class should be not used by end-users, it is intended for

diff --git a/tick/base_model/model_self_concordant.py b/tick/base_model/model_self_concordant.py
@@ -10,10 +10,10 @@ class ModelSelfConcordant(Model):
     """An abstract base class for a model that implements the
     self-concordant constant
 
-    Parameters
+    Attributes
     ----------
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     Notes
     -----

diff --git a/tick/hawkes/model/model_hawkes_expkern_loglik.py b/tick/hawkes/model/model_hawkes_expkern_loglik.py
@@ -83,6 +83,8 @@ class ModelHawkesExpKernLogLik(ModelHawkes, ModelSecondOrder,
     def __init__(self, decay: float, n_threads: int = 1):
         ModelSecondOrder.__init__(self)
         ModelSelfConcordant.__init__(self)
+        # Calling "ModelHawkes.__init__" is necessary so that
+        ## dtype is correctly set
         ModelHawkes.__init__(self, n_threads=1, approx=0)
         self.decay = decay
         self._model = _ModelHawkesExpKernLogLik(decay, n_threads)

diff --git a/tick/linear_model/model_hinge.py b/tick/linear_model/model_hinge.py
@@ -39,9 +39,6 @@ class ModelHinge(ModelFirstOrder, ModelGeneralizedLinear):
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     Attributes
     ----------
     features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
@@ -59,6 +56,9 @@ class ModelHinge(ModelFirstOrder, ModelGeneralizedLinear):
     n_coeffs : `int` (read-only)
         Total number of coefficients of the model
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     n_threads : `int`, default=1 (read-only)
         Number of threads used for parallel computation.
 

diff --git a/tick/linear_model/model_linreg.py b/tick/linear_model/model_linreg.py
@@ -45,9 +45,6 @@ class ModelLinReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     Attributes
     ----------
     features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
@@ -65,6 +62,9 @@ class ModelLinReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
     n_coeffs : `int` (read-only)
         Total number of coefficients of the model
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     n_threads : `int`, default=1 (read-only)
         Number of threads used for parallel computation.
 
@@ -81,6 +81,13 @@ def __init__(self, fit_intercept: bool = True, n_threads: int = 1):
 
         # TODO: implement _set_data and not fit
 
+    @property
+    def _model_class(self):
+        if self.dtype not in dtype_map:
+            raise ValueError('dtype provided to ModelLinReg is not handled: {}'.format(self.dtype))
+        return dtype_map[np.dtype(self.dtype)]
+
+
     def fit(self, features, labels):
         """Set the data into the model object
 
@@ -101,11 +108,7 @@ def fit(self, features, labels):
         ModelGeneralizedLinear.fit(self, features, labels)
         ModelLipschitz.fit(self, features, labels)
 
-        if self.dtype not in dtype_map:
-            raise ValueError('dtype provided to ModelLinReg is not handled: ',
-                             self.dtype)
-
-        self._set("_model", dtype_map[np.dtype(self.dtype)](
+        self._set("_model", self._model_class(
             self.features, self.labels, self.fit_intercept, self.n_threads))
 
         return self
@@ -115,7 +118,7 @@ def _grad(self, coeffs: np.ndarray, out: np.ndarray) -> None:
 
     def _loss(self, coeffs: np.ndarray) -> float:
         if self.dtype is not "float64" and coeffs.dtype is np.float64:
-            coeffs = coeffs.astype(self.dtype)
+            raise ValueError("Model Linreg has received coeffs array with unexpected dtype")
         return self._model.loss(coeffs)
 
     def _get_lip_best(self):

diff --git a/tick/linear_model/model_logreg.py b/tick/linear_model/model_logreg.py
@@ -44,9 +44,6 @@ class ModelLogReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     Attributes
     ----------
     features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
@@ -64,6 +61,9 @@ class ModelLogReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
     n_coeffs : `int` (read-only)
         Total number of coefficients of the model
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     n_threads : `int`, default=1 (read-only)
         Number of threads used for parallel computation.
 
@@ -78,6 +78,12 @@ def __init__(self, fit_intercept: bool = True, n_threads: int = 1):
         ModelLipschitz.__init__(self)
         self.n_threads = n_threads
 
+    @property
+    def _model_class(self):
+        if self.dtype not in dtype_map:
+            raise ValueError('dtype provided to ModelLogReg is not handled: {}'.format(self.dtype))
+        return dtype_map[np.dtype(self.dtype)]
+
     # TODO: implement _set_data and not fit
     def fit(self, features, labels):
         """Set the data into the model object
@@ -99,11 +105,7 @@ def fit(self, features, labels):
         ModelGeneralizedLinear.fit(self, features, labels)
         ModelLipschitz.fit(self, features, labels)
 
-        if self.dtype not in dtype_map:
-            raise ValueError('dtype provided to ModelLogReg is not handled: ',
-                             self.dtype)
-
-        self._set("_model", dtype_map[np.dtype(self.dtype)](
+        self._set("_model", self._model_class(
             self.features, self.labels, self.fit_intercept, self.n_threads))
         return self
 
@@ -133,8 +135,8 @@ def sigmoid(coeffs: np.ndarray, out: np.ndarray = None) -> np.ndarray:
         """
         if out is None:
             out = np.empty(coeffs.shape[0], dtype=coeffs.dtype)
-        # this following line requires "np.dtype('floatxx')
-        #  for reasons unknown
+        # sigmoid is a templated static function so
+        ## we must call the right version for the right dtype
         dtype_map[coeffs.dtype].sigmoid(coeffs, out)
         return out
 

diff --git a/tick/linear_model/model_poisreg.py b/tick/linear_model/model_poisreg.py
@@ -55,8 +55,8 @@ class ModelPoisReg(ModelGeneralizedLinear, ModelSecondOrder,
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     link : `str`, default="exponential"
         Type of link function

diff --git a/tick/linear_model/model_quadratic_hinge.py b/tick/linear_model/model_quadratic_hinge.py
@@ -42,9 +42,6 @@ class ModelQuadraticHinge(ModelFirstOrder, ModelGeneralizedLinear,
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     Attributes
     ----------
     features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
@@ -62,6 +59,9 @@ class ModelQuadraticHinge(ModelFirstOrder, ModelGeneralizedLinear,
     n_coeffs : `int` (read-only)
         Total number of coefficients of the model
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     n_threads : `int`, default=1 (read-only)
         Number of threads used for parallel computation.
 

diff --git a/tick/linear_model/model_smoothed_hinge.py b/tick/linear_model/model_smoothed_hinge.py
@@ -45,9 +45,6 @@ class ModelSmoothedHinge(ModelFirstOrder, ModelGeneralizedLinear,
     fit_intercept : `bool`
         If `True`, the model uses an intercept
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
-
     smoothness : `double`, default=1.
         The smoothness parameter used in the loss. It should be > 0 and <= 1
         Note that smoothness=0 corresponds to the Hinge loss.
@@ -69,6 +66,9 @@ class ModelSmoothedHinge(ModelFirstOrder, ModelGeneralizedLinear,
     n_coeffs : `int` (read-only)
         Total number of coefficients of the model
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     n_threads : `int`, default=1 (read-only)
         Number of threads used for parallel computation.
 

diff --git a/tick/linear_model/simu_linreg.py b/tick/linear_model/simu_linreg.py
@@ -38,8 +38,8 @@ class SimuLinReg(SimuWithFeatures):
     cov_corr : `float`, default=.5
         Correlation to use in the Toeplitz correlation matrix
 
-    dtype : `string`, default='float64'
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
 
     features_scaling : `str`, default="none"
         The way the features matrix is scaled after simulation
@@ -61,6 +61,9 @@ class SimuLinReg(SimuWithFeatures):
     verbose : `bool`, default=True
         If `True`, print things
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     Attributes
     ----------
     features : `numpy.ndarray`, shape=(n_samples, n_features)
@@ -78,8 +81,8 @@ class SimuLinReg(SimuWithFeatures):
     time_end : `str`
         End date of the simulation
 
-    dtype : `{'float64', 'float32'}`
-        Type of arrays to use - default float64
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
         Used in the case features is None
     """
 
@@ -123,6 +126,7 @@ def _simulate(self):
         if self.intercept is not None:
             u += self.intercept
         labels = u + self.std * np.random.randn(n_samples)
+        # "astype" must be used for labels as it is always float64
         if self.dtype != np.float64:
             labels = labels.astype(self.dtype)
         self._set("labels", labels)

diff --git a/tick/linear_model/simu_logreg.py b/tick/linear_model/simu_logreg.py
@@ -55,6 +55,9 @@ class SimuLogReg(SimuWithFeatures):
     verbose : `bool`, default=True
         If `True`, print things
 
+    dtype : `{'float64', 'float32'}`, default='float64'
+        Type of the arrays used. This value is set from model and prox dtypes.
+
     Attributes
     ----------
     features : `numpy.ndarray`, shape=(n_samples, n_features)
@@ -72,9 +75,6 @@ class SimuLogReg(SimuWithFeatures):
     time_end : `str`
         End date of the simulation
 
-    dtype : `{'float64', 'float32'}`
-        Type of arrays to use - default float64
-        Used in the case features is None
     """
 
     _attrinfos = {"labels": {"writable": False}}
@@ -126,10 +126,8 @@ def _simulate(self):
             u += self.intercept
         p = np.empty(n_samples)
         p[:] = SimuLogReg.sigmoid(u)
-        labels = np.empty(n_samples)
+        labels = np.empty(n_samples, dtype=self.dtype)
         labels[:] = np.random.binomial(1, p, size=n_samples)
         labels[labels == 0] = -1
-        if self.dtype != np.float64:
-            labels = labels.astype(self.dtype)
         self._set("labels", labels)
         return features, labels