Skip to content

Commit

Permalink
#185 add float types #3 - Post second review with more dtypes tests f…
Browse files Browse the repository at this point in the history
…or prox/linear_model
  • Loading branch information
PhilipDeegan committed May 9, 2018
1 parent cfd7775 commit c062638
Show file tree
Hide file tree
Showing 77 changed files with 986 additions and 553 deletions.
4 changes: 3 additions & 1 deletion lib/cpp/prox/prox_slope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ void TProxSlope<T>::compute_weights(void) {
ulong size = end - start;
weights = Array<T>(size);
for (ulong i = 0; i < size; i++) {
T tmp = false_discovery_rate / (2 * size);
// tmp is double as float prevents adequate precision for
// standard_normal_inv_cdf
double tmp = false_discovery_rate / (2 * size);
weights[i] = strength * standard_normal_inv_cdf(1 - tmp * (i + 1));
}
weights_ready = true;
Expand Down
4 changes: 3 additions & 1 deletion lib/cpp/prox/prox_sorted_l1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ T TProxSortedL1<T>::value(const Array<T> &coeffs, ulong start, ulong end) {
// Sort sub_coeffs with decreasing absolute values, and keeping sorting
// indexes in idx
Array<T> sub_coeffs_sorted = sort_abs(sub_coeffs, idx, false);
T val = 0;
// val is double as float prevents adequate precision of sum
// at least in tests
double val = 0;
for (ulong i = 0; i < size; i++) {
val += weights[i] * std::abs(sub_coeffs_sorted[i]);
}
Expand Down
5 changes: 2 additions & 3 deletions lib/include/tick/prox/prox.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,12 @@ inline std::ostream& operator<<(std::ostream& s, const TProx<T>& p) {
return s << typeid(p).name() << "<" << typeid(T).name() << ">";
}

using Prox = TProx<double>;
using ProxPtr = std::shared_ptr<Prox>;

using ProxDouble = TProx<double>;
using ProxDoublePtr = std::shared_ptr<ProxDouble>;
using ProxDoublePtrVector = std::vector<ProxDoublePtr>;

using ProxFloat = TProx<float>;
using ProxFloatPtr = std::shared_ptr<ProxFloat>;
using ProxFloatPtrVector = std::vector<ProxFloatPtr>;

#endif // LIB_INCLUDE_TICK_PROX_PROX_H_
1 change: 0 additions & 1 deletion lib/include/tick/prox/prox_multi.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class DLL_PUBLIC TProxMulti : public TProx<T> {
BoolStrReport operator==(const TProxMulti<T>& that) { return compare(that); }
};

using ProxMulti = TProxMulti<double>;
using ProxMultiDouble = TProxMulti<double>;
using ProxMultiFloat = TProxMulti<float>;

Expand Down
31 changes: 1 addition & 30 deletions lib/swig/prox/prox.i
Original file line number Diff line number Diff line change
Expand Up @@ -35,35 +35,6 @@ class TProx {
virtual void set_positive(bool positive);
};

%rename(Prox) TProx<double>;
class TProx<double> {
public:
Prox(
double strength,
bool positive
);
Prox(
double strength,
unsigned long start,
unsigned long end,
bool positive
);
virtual void call(
const ArrayDouble &coeffs,
double step,
ArrayDouble &out
);
virtual double value(const ArrayDouble &coeffs);
virtual double get_strength() const;
virtual void set_strength(double strength);
virtual ulong get_start() const;
virtual ulong get_end() const;
virtual void set_start_end(ulong start, ulong end);
virtual bool get_positive() const;
virtual void set_positive(bool positive);
};
typedef TProx<double> Prox;

%rename(ProxDouble) TProx<double>;
class TProx<double> {
public:
Expand Down Expand Up @@ -92,7 +63,6 @@ class TProx<double> {
virtual void set_positive(bool positive);
};
typedef TProx<double> ProxDouble;

%rename(ProxDoublePtr) std::shared_ptr<ProxDouble>;
typedef std::shared_ptr<ProxDouble> ProxDoublePtr;

Expand Down Expand Up @@ -124,4 +94,5 @@ class TProx<float> {
virtual void set_positive(bool positive);
};
typedef TProx<float> ProxFloat;
%rename(ProxFloatPtr) std::shared_ptr<ProxFloat>;
typedef std::shared_ptr<ProxFloat> ProxFloatPtr;
3 changes: 0 additions & 3 deletions lib/swig/prox/prox_binarsity.i
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@ class TProxBinarsity : public TProxWithGroups<T> {
bool compare(const TProxBinarsity<T> &that);
};

%template(ProxBinarsity) TProxBinarsity<double>;
typedef TProxBinarsity<double> ProxBinarsity;

%template(ProxBinarsityDouble) TProxBinarsity<double>;
typedef TProxBinarsity<double> ProxBinarsityDouble;

Expand Down
3 changes: 0 additions & 3 deletions lib/swig/prox/prox_group_l1.i
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ class TProxGroupL1 : public TProxWithGroups<T> {
bool compare(const TProxGroupL1<T> &that);
};

%template(ProxGroupL1) TProxGroupL1<double>;
typedef TProxGroupL1<double> ProxGroupL1;

%template(ProxGroupL1Double) TProxGroupL1<double>;
typedef TProxGroupL1<double> ProxGroupL1Double;

Expand Down
18 changes: 12 additions & 6 deletions lib/swig/prox/prox_multi.i
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
%include "prox.i"

%template(ProxDoublePtrVector) std::vector<ProxDoublePtr>;
%template(ProxFloatPtrVector) std::vector<ProxFloatPtr>;

template <class T>
class TProxMulti : public TProx<T> {
Expand All @@ -17,11 +18,16 @@ class TProxMulti : public TProx<T> {
bool compare(const TProxMulti<T> &that);
};

%template(ProxMulti) TProxMulti<double>;
typedef TProxMulti<double> ProxMulti;

%template(ProxMultiDouble) TProxMulti<double>;
%rename(ProxMultiDouble) TProxMulti<double>;
class TProxMulti<double> : public TProx<double> {
public:
ProxMultiDouble(std::vector<std::shared_ptr<TProx<double> > > proxs);
};
typedef TProxMulti<double> ProxMultiDouble;

%template(ProxMultiFloat) TProxMulti<float>;
typedef TProxMulti<float> ProxMultiFloat;
%rename(ProxMultiFloat) TProxMulti<float>;
class TProxMulti<float> : public TProx<float> {
public:
ProxMultiFloat(std::vector<std::shared_ptr<TProx<float> > > proxs);
};
typedef TProxMulti<double> ProxMultiDouble;
3 changes: 0 additions & 3 deletions lib/swig/prox/prox_with_groups.i
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ class TProxWithGroups : public TProx<T> {
ulong start, ulong end, bool positive);
};

%template(ProxWithGroups) TProxWithGroups<double>;
typedef TProxWithGroups<double> ProxWithGroups;

%template(ProxWithGroupsDouble) TProxWithGroups<double>;
typedef TProxWithGroups<double> ProxWithGroupsDouble;

Expand Down
31 changes: 1 addition & 30 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -775,39 +775,10 @@ def initialize_options(self):
self.start_dir = '.'
self.added = {}

# This function takes a full qualified class name and returns
## the class as type which can be used to construct the class
def fullname(self, o):
return getattr(sys.modules[o.__module__], o.__class__.__name__)

def parameterize(self, klass, dtype):
testnames = unittest.TestLoader().getTestCaseNames(klass)
suite = unittest.TestSuite()
clazz = self.fullname(klass)
if clazz in self.added and dtype in self.added[clazz]:
return suite
if clazz not in self.added:
self.added[clazz] = []
self.added[clazz].append(dtype)
for name in testnames:
suite.addTest(clazz(name, dtype=dtype))
return suite

def run(self):
dtype_list = ["float64", "float32"]
loader = unittest.TestLoader()
alltests = loader.discover(self.start_dir, pattern="*_test.py")
suite = unittest.TestSuite()
for testsuite in alltests:
for test in testsuite:
if type(test).__name__ is not "_FailedTest":
for t in test._tests:
if type(t).__name__ is "SolverTest":
for dt in dtype_list:
suite.addTest(self.parameterize(t, dtype=dt))
else:
suite.addTest(t)
result = unittest.TextTestRunner(verbosity=2).run(suite)
result = unittest.TextTestRunner(verbosity=2).run(alltests)
sys.exit(not result.wasSuccessful())


Expand Down
8 changes: 4 additions & 4 deletions tick/base/learner/learner_optim.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class LearnerOptim(ABC, Base):
record_every : `int`, default=10
Record history information when ``n_iter`` (iteration number) is
a multiple of ``record_every``
Other Parameters
----------------
sdca_ridge_strength : `float`, default=1e-3
Expand All @@ -72,13 +72,13 @@ class LearnerOptim(ABC, Base):
random_state : int seed, RandomState instance, or None (default)
The seed that will be used by stochastic solvers. Used in 'sgd',
'svrg', and 'sdca' solvers
blocks_start : `numpy.array`, shape=(n_features,), default=None
The indices of the first column of each binarized feature blocks. It
corresponds to the ``feature_indices`` property of the
``FeaturesBinarizer`` preprocessing.
Used in 'binarsity' penalty
blocks_length : `numpy.array`, shape=(n_features,), default=None
The length of each binarized feature blocks. It corresponds to the
``n_values`` property of the ``FeaturesBinarizer`` preprocessing.
Expand Down Expand Up @@ -446,5 +446,5 @@ def sdca_ridge_strength(self, val):
self.solver, RuntimeWarning)

@staticmethod
def _safe_array(X, dtype=np.float64):
def _safe_array(X, dtype="float64"):
return safe_array(X, dtype)
35 changes: 35 additions & 0 deletions tick/base_model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,38 @@ def _loss(self, coeffs: np.ndarray) -> float:
"""Must be overloaded in child class
"""
pass

def _get_typed_class(self, dtype_or_object_with_dtype, dtype_map):
"""Deduce dtype and return true if C++ _model should be set
"""
import six
should_update_model = self._model is None
local_dtype = None
if (isinstance(dtype_or_object_with_dtype, six.string_types)
or isinstance(dtype_or_object_with_dtype, np.dtype)):
local_dtype = np.dtype(dtype_or_object_with_dtype)
elif hasattr(dtype_or_object_with_dtype, 'dtype'):
local_dtype = np.dtype(dtype_or_object_with_dtype.dtype)
else:
raise ValueError(("""
unsupported type used for model creation,
expects dtype or class with dtype , type:
""" + self.__class__.__name__).strip())
if self.dtype != local_dtype:
should_update_model = True
self.dtype = local_dtype
if np.dtype(self.dtype) not in dtype_map:
raise ValueError("""dtype does not exist in
type map for """ + self.__class__.__name__.strip())
return (should_update_model, dtype_map[np.dtype(self.dtype)])

def astype(self, dtype_or_object_with_dtype):
new_model = self._build_cpp_model(dtype_or_object_with_dtype)
print("new_model", new_model)
if new_model is not None:
self._set('_model', new_model)
return self

def _build_cpp_model(self, dtype: str):
raise ValueError("""This function is expected to
overriden in a subclass""".strip())
26 changes: 20 additions & 6 deletions tick/base_model/tests/generalized_linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,21 @@


class TestGLM(unittest.TestCase):
def __init__(self, *args, dtype="float64", **kwargs):
unittest.TestCase.__init__(self, *args, **kwargs)
self.dtype = dtype
self.decimal_places = 7
if np.dtype(self.dtype) == np.dtype("float32"):
self.decimal_places = 3

def _test_grad(self, model, coeffs, delta_check_grad=1e-5,
delta_model_grad=1e-4):
"""Test that gradient is consistent with loss and that minimum is
achievable with a small gradient
"""
## This function passes off to scipy which always uses float64
if coeffs.dtype is not np.dtype("float64"):
return
self.assertAlmostEqual(
check_grad(model.loss, model.grad, coeffs), 0.,
delta=delta_check_grad)
Expand All @@ -24,7 +34,7 @@ def _test_grad(self, model, coeffs, delta_check_grad=1e-5,

def run_test_for_glm(self, model, model_spars=None, delta_check_grad=1e-5,
delta_model_grad=1e-4):
coeffs = np.random.randn(model.n_coeffs)
coeffs = np.random.randn(model.n_coeffs).astype(self.dtype)
# dense case
self._test_grad(model, coeffs, delta_check_grad=delta_check_grad,
delta_model_grad=delta_model_grad)
Expand All @@ -36,11 +46,13 @@ def run_test_for_glm(self, model, model_spars=None, delta_check_grad=1e-5,
# Check that loss computed in the dense and sparse case are
# the same
self.assertAlmostEqual(
model.loss(coeffs), model_spars.loss(coeffs))
model.loss(coeffs), model_spars.loss(coeffs),
places=self.decimal_places)
# Check that gradients computed in the dense and sparse
# case are the same
np.testing.assert_almost_equal(
model.grad(coeffs), model_spars.grad(coeffs), decimal=10)
model.grad(coeffs), model_spars.grad(coeffs),
decimal=self.decimal_places)

def _test_glm_intercept_vs_hardcoded_intercept(self, model):
# If the model has an intercept (ModelCoxReg does not for instance)
Expand All @@ -49,15 +61,17 @@ def _test_glm_intercept_vs_hardcoded_intercept(self, model):
if model.fit_intercept:
X = model.features
y = model.labels
coeffs = np.random.randn(model.n_coeffs)
coeffs = np.random.randn(model.n_coeffs).astype(self.dtype)
grad1 = model.grad(coeffs)

X_with_ones = np.hstack((X, np.ones((model.n_samples, 1))))
X_with_ones = np.hstack((X, np.ones((model.n_samples,
1)))).astype(self.dtype)
model.fit_intercept = False
model.fit(X_with_ones, y)
grad2 = model.grad(coeffs)

np.testing.assert_almost_equal(grad1, grad2, decimal=10)
np.testing.assert_almost_equal(grad1, grad2,
decimal=self.decimal_places)

# Put back model to its previous status
model.fit_intercept = True
Expand Down
13 changes: 8 additions & 5 deletions tick/hawkes/model/build/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@

from tick.base.opsys import add_to_path_if_windows


def required():
import os, sys
root = os.path.dirname(os.path.realpath(os.path.join(__file__, "../../..")))
import os, sys
root = os.path.dirname(
os.path.realpath(os.path.join(__file__, "../../..")))

if "tick.base_model.build" not in sys.modules:
p = os.path.realpath(os.path.join(root, "base_model/build"))
os.environ["PATH"] = p + os.pathsep + os.environ["PATH"]

if "tick.base_model.build" not in sys.modules:
p = os.path.realpath(os.path.join(root, "base_model/build"))
os.environ["PATH"] = p + os.pathsep + os.environ["PATH"]

add_to_path_if_windows(__file__, [required])
20 changes: 18 additions & 2 deletions tick/linear_model/model_hinge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

import numpy as np
from tick.base_model import ModelGeneralizedLinear, ModelFirstOrder
from .build.linear_model import ModelHingeDouble as _ModelHinge
from .build.linear_model import ModelHingeDouble as _ModelHingeDouble
from .build.linear_model import ModelHingeFloat as _ModelHingeFloat

__author__ = 'Stephane Gaiffas'

dtype_map = {
np.dtype('float64'): _ModelHingeDouble,
np.dtype('float32'): _ModelHingeFloat
}


class ModelHinge(ModelFirstOrder, ModelGeneralizedLinear):
"""Hinge loss model for binary classification. This class gives first order
Expand Down Expand Up @@ -91,8 +97,10 @@ def fit(self, features, labels):
"""
ModelFirstOrder.fit(self, features, labels)
ModelGeneralizedLinear.fit(self, features, labels)

model_class = self._get_typed_class(features.dtype, dtype_map)[1]
self._set("_model",
_ModelHinge(self.features, self.labels, self.fit_intercept,
model_class(self.features, self.labels, self.fit_intercept,
self.n_threads))
return self

Expand All @@ -101,3 +109,11 @@ def _grad(self, coeffs: np.ndarray, out: np.ndarray) -> None:

def _loss(self, coeffs: np.ndarray) -> float:
return self._model.loss(coeffs)

def _build_cpp_model(self, dtype_or_object_with_dtype):
(updated_model, model_class) = \
self._get_typed_class(dtype_or_object_with_dtype, dtype_map)
if updated_model is True:
return model_class(self.features, self.labels, self.fit_intercept,
self.n_threads)
return None
Loading

0 comments on commit c062638

Please sign in to comment.