Skip to content

Commit

Permalink
#185 add float types #2 - Post initial review
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipDeegan committed May 22, 2018
1 parent cfeae7c commit a336848
Show file tree
Hide file tree
Showing 53 changed files with 372 additions and 400 deletions.
3 changes: 3 additions & 0 deletions lib/cpp-test/serialization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# This test is quite long and can cause issues so
## it is not included in the normal set of gtests

add_executable(tick_test_serialization solver.cpp)

target_link_libraries(tick_test_serialization
Expand Down
5 changes: 4 additions & 1 deletion tick/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# -*- coding: utf-8 -*-
"""tick module
"""
# License: BSD 3 clause

import tick.base
import tick.base
4 changes: 2 additions & 2 deletions tick/base/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,6 @@ def _inc_attr(self, key: str, step: int = 1):

def __str__(self):
dic = self._as_dict()
if isinstance(dic['dtype'], np.dtype):
dic['dtype'] = dic['dtype'].name
if 'dtype' in dic and isinstance(dic['dtype'], np.dtype):
dic['dtype'] = dic['dtype'].name
return json.dumps(dic, sort_keys=True, indent=2)
4 changes: 2 additions & 2 deletions tick/base/simulation/simu_with_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ class SimuWithFeatures(Simu):
verbose : `bool`
If True, print things
dtype : `{'float64', 'float32'}`
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Used in the case features is None
"""

Expand Down
4 changes: 2 additions & 2 deletions tick/base_model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class Model(ABC, Base):
n_passes_over_data : `int` (read-only)
Number of effective passes through the data
dtype : `{'float64', 'float32'}`
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Notes
-----
Expand Down
10 changes: 5 additions & 5 deletions tick/base_model/model_labels_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class ModelLabelsFeatures(Model):
Parameters
----------
dtype : `string`, default='float64'
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Attributes
----------
Expand Down Expand Up @@ -59,7 +59,7 @@ def __init__(self):
self.n_features = None
self.n_samples = None

def check_set_dtype(self, features: np.ndarray, labels: np.ndarray):
def _check_set_dtype(self, features: np.ndarray, labels: np.ndarray):
self.dtype = features.dtype
if self.dtype != labels.dtype:
raise ValueError("Features and labels differ in data types")
Expand All @@ -81,12 +81,12 @@ def fit(self, features: np.ndarray, labels: np.ndarray) -> Model:
The current instance with given data
"""
# The fit from Model calls the _set_data below
self.check_set_dtype(features, labels)
self._check_set_dtype(features, labels)
return Model.fit(self, features, labels)

def _set_data(self, features, labels):
n_samples, n_features = features.shape
self.check_set_dtype(features, labels)
self._check_set_dtype(features, labels)
if n_samples != labels.shape[0]:
raise ValueError(("Features has %i samples while labels "
"have %i" % (n_samples, labels.shape[0])))
Expand Down
4 changes: 2 additions & 2 deletions tick/base_model/model_lipschitz.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ class ModelLipschitz(Model):
Parameters
----------
dtype : `string`, default='float64'
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Notes
-----
Expand Down
8 changes: 3 additions & 5 deletions tick/base_model/model_second_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,14 @@ class ModelSecondOrder(ModelFirstOrder):
order and second information, namely gradient and hessian norm
information
Parameters
----------
dtype : `string`, default='float64'
Type of arrays to use - default float64
Attributes
----------
n_calls_hessian_norm : `int` (read-only)
Number of times ``hessian_norm`` has been called so far
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Notes
-----
This class should be not used by end-users, it is intended for
Expand Down
6 changes: 3 additions & 3 deletions tick/base_model/model_self_concordant.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class ModelSelfConcordant(Model):
"""An abstract base class for a model that implements the
self-concordant constant
Parameters
Attributes
----------
dtype : `string`, default='float64'
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Notes
-----
Expand Down
2 changes: 2 additions & 0 deletions tick/hawkes/model/model_hawkes_expkern_loglik.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class ModelHawkesExpKernLogLik(ModelHawkes, ModelSecondOrder,
def __init__(self, decay: float, n_threads: int = 1):
ModelSecondOrder.__init__(self)
ModelSelfConcordant.__init__(self)
# Calling "ModelHawkes.__init__" is necessary so that
## dtype is correctly set
ModelHawkes.__init__(self, n_threads=1, approx=0)
self.decay = decay
self._model = _ModelHawkesExpKernLogLik(decay, n_threads)
Expand Down
6 changes: 3 additions & 3 deletions tick/linear_model/model_hinge.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ class ModelHinge(ModelFirstOrder, ModelGeneralizedLinear):
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
Attributes
----------
features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
Expand All @@ -59,6 +56,9 @@ class ModelHinge(ModelFirstOrder, ModelGeneralizedLinear):
n_coeffs : `int` (read-only)
Total number of coefficients of the model
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
n_threads : `int`, default=1 (read-only)
Number of threads used for parallel computation.
Expand Down
21 changes: 12 additions & 9 deletions tick/linear_model/model_linreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ class ModelLinReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
Attributes
----------
features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
Expand All @@ -65,6 +62,9 @@ class ModelLinReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
n_coeffs : `int` (read-only)
Total number of coefficients of the model
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
n_threads : `int`, default=1 (read-only)
Number of threads used for parallel computation.
Expand All @@ -81,6 +81,13 @@ def __init__(self, fit_intercept: bool = True, n_threads: int = 1):

# TODO: implement _set_data and not fit

@property
def _model_class(self):
if self.dtype not in dtype_map:
raise ValueError('dtype provided to ModelLinReg is not handled: {}'.format(self.dtype))
return dtype_map[np.dtype(self.dtype)]


def fit(self, features, labels):
"""Set the data into the model object
Expand All @@ -101,11 +108,7 @@ def fit(self, features, labels):
ModelGeneralizedLinear.fit(self, features, labels)
ModelLipschitz.fit(self, features, labels)

if self.dtype not in dtype_map:
raise ValueError('dtype provided to ModelLinReg is not handled: ',
self.dtype)

self._set("_model", dtype_map[np.dtype(self.dtype)](
self._set("_model", self._model_class(
self.features, self.labels, self.fit_intercept, self.n_threads))

return self
Expand All @@ -115,7 +118,7 @@ def _grad(self, coeffs: np.ndarray, out: np.ndarray) -> None:

def _loss(self, coeffs: np.ndarray) -> float:
if self.dtype is not "float64" and coeffs.dtype is np.float64:
coeffs = coeffs.astype(self.dtype)
raise ValueError("Model Linreg has received coeffs array with unexpected dtype")
return self._model.loss(coeffs)

def _get_lip_best(self):
Expand Down
22 changes: 12 additions & 10 deletions tick/linear_model/model_logreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ class ModelLogReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
Attributes
----------
features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
Expand All @@ -64,6 +61,9 @@ class ModelLogReg(ModelFirstOrder, ModelGeneralizedLinear, ModelLipschitz):
n_coeffs : `int` (read-only)
Total number of coefficients of the model
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
n_threads : `int`, default=1 (read-only)
Number of threads used for parallel computation.
Expand All @@ -78,6 +78,12 @@ def __init__(self, fit_intercept: bool = True, n_threads: int = 1):
ModelLipschitz.__init__(self)
self.n_threads = n_threads

@property
def _model_class(self):
if self.dtype not in dtype_map:
raise ValueError('dtype provided to ModelLogReg is not handled: {}'.format(self.dtype))
return dtype_map[np.dtype(self.dtype)]

# TODO: implement _set_data and not fit
def fit(self, features, labels):
"""Set the data into the model object
Expand All @@ -99,11 +105,7 @@ def fit(self, features, labels):
ModelGeneralizedLinear.fit(self, features, labels)
ModelLipschitz.fit(self, features, labels)

if self.dtype not in dtype_map:
raise ValueError('dtype provided to ModelLogReg is not handled: ',
self.dtype)

self._set("_model", dtype_map[np.dtype(self.dtype)](
self._set("_model", self._model_class(
self.features, self.labels, self.fit_intercept, self.n_threads))
return self

Expand Down Expand Up @@ -133,8 +135,8 @@ def sigmoid(coeffs: np.ndarray, out: np.ndarray = None) -> np.ndarray:
"""
if out is None:
out = np.empty(coeffs.shape[0], dtype=coeffs.dtype)
# this following line requires "np.dtype('floatxx')
# for reasons unknown
# sigmoid is a templated static function so
## we must call the right version for the right dtype
dtype_map[coeffs.dtype].sigmoid(coeffs, out)
return out

Expand Down
4 changes: 2 additions & 2 deletions tick/linear_model/model_poisreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ class ModelPoisReg(ModelGeneralizedLinear, ModelSecondOrder,
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
link : `str`, default="exponential"
Type of link function
Expand Down
6 changes: 3 additions & 3 deletions tick/linear_model/model_quadratic_hinge.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,6 @@ class ModelQuadraticHinge(ModelFirstOrder, ModelGeneralizedLinear,
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
Attributes
----------
features : {`numpy.ndarray`, `scipy.sparse.csr_matrix`}, shape=(n_samples, n_features)
Expand All @@ -62,6 +59,9 @@ class ModelQuadraticHinge(ModelFirstOrder, ModelGeneralizedLinear,
n_coeffs : `int` (read-only)
Total number of coefficients of the model
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
n_threads : `int`, default=1 (read-only)
Number of threads used for parallel computation.
Expand Down
6 changes: 3 additions & 3 deletions tick/linear_model/model_smoothed_hinge.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ class ModelSmoothedHinge(ModelFirstOrder, ModelGeneralizedLinear,
fit_intercept : `bool`
If `True`, the model uses an intercept
dtype : `string`, default='float64'
Type of arrays to use - default float64
smoothness : `double`, default=1.
The smoothness parameter used in the loss. It should be > 0 and <= 1
Note that smoothness=0 corresponds to the Hinge loss.
Expand All @@ -69,6 +66,9 @@ class ModelSmoothedHinge(ModelFirstOrder, ModelGeneralizedLinear,
n_coeffs : `int` (read-only)
Total number of coefficients of the model
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
n_threads : `int`, default=1 (read-only)
Number of threads used for parallel computation.
Expand Down
12 changes: 8 additions & 4 deletions tick/linear_model/simu_linreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ class SimuLinReg(SimuWithFeatures):
cov_corr : `float`, default=.5
Correlation to use in the Toeplitz correlation matrix
dtype : `string`, default='float64'
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
features_scaling : `str`, default="none"
The way the features matrix is scaled after simulation
Expand All @@ -61,6 +61,9 @@ class SimuLinReg(SimuWithFeatures):
verbose : `bool`, default=True
If `True`, print things
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Attributes
----------
features : `numpy.ndarray`, shape=(n_samples, n_features)
Expand All @@ -78,8 +81,8 @@ class SimuLinReg(SimuWithFeatures):
time_end : `str`
End date of the simulation
dtype : `{'float64', 'float32'}`
Type of arrays to use - default float64
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Used in the case features is None
"""

Expand Down Expand Up @@ -123,6 +126,7 @@ def _simulate(self):
if self.intercept is not None:
u += self.intercept
labels = u + self.std * np.random.randn(n_samples)
# "astype" must be used for labels as it is always float64
if self.dtype != np.float64:
labels = labels.astype(self.dtype)
self._set("labels", labels)
Expand Down
10 changes: 4 additions & 6 deletions tick/linear_model/simu_logreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class SimuLogReg(SimuWithFeatures):
verbose : `bool`, default=True
If `True`, print things
dtype : `{'float64', 'float32'}`, default='float64'
Type of the arrays used. This value is set from model and prox dtypes.
Attributes
----------
features : `numpy.ndarray`, shape=(n_samples, n_features)
Expand All @@ -72,9 +75,6 @@ class SimuLogReg(SimuWithFeatures):
time_end : `str`
End date of the simulation
dtype : `{'float64', 'float32'}`
Type of arrays to use - default float64
Used in the case features is None
"""

_attrinfos = {"labels": {"writable": False}}
Expand Down Expand Up @@ -126,10 +126,8 @@ def _simulate(self):
u += self.intercept
p = np.empty(n_samples)
p[:] = SimuLogReg.sigmoid(u)
labels = np.empty(n_samples)
labels = np.empty(n_samples, dtype=self.dtype)
labels[:] = np.random.binomial(1, p, size=n_samples)
labels[labels == 0] = -1
if self.dtype != np.float64:
labels = labels.astype(self.dtype)
self._set("labels", labels)
return features, labels
Loading

0 comments on commit a336848

Please sign in to comment.