diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 44ff171a..a30ac7a1 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -2,6 +2,7 @@ name: Daily runs on: schedule: - cron: '0 5 * * *' + workflow_dispatch: push: paths: - '.github/workflows/daily.yml' @@ -34,17 +35,20 @@ jobs: run: | # needed for tabmat echo "Install compilation dependencies" - micromamba install -y c-compiler cxx-compiler cython jemalloc-local libgomp mako xsimd + micromamba install -y c-compiler cxx-compiler 'cython!=3.0.4' jemalloc-local libgomp mako xsimd - PRE_WHEELS="https://pypi.anaconda.org/scipy-wheels-nightly/simple" + PRE_WHEELS="https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/" for pkg in numpy pandas scikit-learn scipy; do echo "Installing $pkg nightly" micromamba remove -y --force $pkg pip install --pre --no-deps --only-binary :all: --upgrade --timeout=60 -i $PRE_WHEELS $pkg done + echo Install pyarrow nightly + micromamba remove -y --force pyarrow + pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ --prefer-binary --pre --no-deps pyarrow echo Install tabmat nightly micromamba remove -y --force tabmat - pip install git+https://github.com/Quantco/tabmat + pip install --no-use-pep517 --no-deps git+https://github.com/Quantco/tabmat - name: Install repository shell: bash -el {0} run: pip install --no-use-pep517 --no-deps --disable-pip-version-check -e . diff --git a/src/glum/_distribution.py b/src/glum/_distribution.py index d142b396..6b52187e 100644 --- a/src/glum/_distribution.py +++ b/src/glum/_distribution.py @@ -616,7 +616,7 @@ class TweedieDistribution(ExponentialDispersionModel): :math:`0 < \mathrm{power} < 1`, no distribution exists. """ - upper_bound = np.Inf + upper_bound = np.inf include_upper_bound = False def __init__(self, power=0): @@ -630,7 +630,7 @@ def __eq__(self, other): # noqa D def lower_bound(self) -> float: """Return the lowest value of ``y`` allowed.""" if self.power <= 0: - return -np.Inf + return -np.inf if self.power >= 1: return 0 raise ValueError @@ -904,8 +904,8 @@ class GeneralizedHyperbolicSecant(ExponentialDispersionModel): The GHS distribution is for targets ``y`` in ``(-∞, +∞)``. """ - lower_bound = -np.Inf - upper_bound = np.Inf + lower_bound = -np.inf + upper_bound = np.inf include_lower_bound = False include_upper_bound = False @@ -1133,7 +1133,7 @@ class NegativeBinomialDistribution(ExponentialDispersionModel): """ lower_bound = 0 - upper_bound = np.Inf + upper_bound = np.inf include_lower_bound = True include_upper_bound = False diff --git a/src/glum/_glm.py b/src/glum/_glm.py index 758c370b..39ba0a89 100644 --- a/src/glum/_glm.py +++ b/src/glum/_glm.py @@ -929,7 +929,11 @@ def _make_grid(max_alpha: float) -> np.ndarray: warnings.warn("`min_alpha` is set. Ignoring `min_alpha_ratio`.") min_alpha = self.min_alpha return np.logspace( - np.log(max_alpha), np.log(min_alpha), self.n_alphas, base=np.e + np.log(max_alpha), + np.log(min_alpha), + self.n_alphas, + base=np.e, + dtype=X.dtype, ) if np.all(P1_no_alpha == 0): @@ -1631,7 +1635,7 @@ def _wald_test_matrix( # We want to calculate Rb_r^T (RVR)^{-1} Rb_r. # We can do it in a more numerically stable way by using `scipy.linalg.solve`: try: - test_stat = float(Rb_r.T @ linalg.solve(RVR, Rb_r)) + test_stat = (Rb_r.T @ linalg.solve(RVR, Rb_r))[0] except linalg.LinAlgError as err: raise linalg.LinAlgError("The restriction matrix is not full rank") from err p_value = 1 - stats.chi2.cdf(test_stat, Q) @@ -2286,7 +2290,7 @@ def _expand_categorical_penalties(penalty, X, drop_first): list( chain.from_iterable( [elmt for _ in dtype.categories[int(drop_first) :]] - if pd.api.types.is_categorical_dtype(dtype) + if isinstance(dtype, pd.CategoricalDtype) else [elmt] for elmt, dtype in zip(penalty, X.dtypes) ) diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py index b450d399..4cb208fe 100644 --- a/src/glum/_glm_cv.py +++ b/src/glum/_glm_cv.py @@ -454,7 +454,10 @@ def fit( if self.alphas is None: alphas = [self._get_alpha_path(l1, X, y, sample_weight) for l1 in l1_ratio] else: - alphas = np.tile(np.sort(self.alphas)[::-1], (len(l1_ratio), 1)) + alphas = np.tile( + np.sort(np.asarray(self.alphas, dtype=X.dtype))[::-1], + (len(l1_ratio), 1), + ) if len(l1_ratio) == 1: self.alphas_ = alphas[0] diff --git a/src/glum/_solvers.py b/src/glum/_solvers.py index dccc4433..e4752aa5 100644 --- a/src/glum/_solvers.py +++ b/src/glum/_solvers.py @@ -916,7 +916,7 @@ def _trust_constr_solver( # we express constraints in the form A theta <= b constraints = LinearConstraint( A=A_ineq_, - lb=-np.Inf, + lb=-np.inf, ub=b_ineq, ) else: diff --git a/src/glum/_util.py b/src/glum/_util.py index aabf7b8a..57a256c6 100644 --- a/src/glum/_util.py +++ b/src/glum/_util.py @@ -34,11 +34,11 @@ def _align_df_categories(df, dtypes) -> pd.DataFrame: categorical_dtypes = [ column for column, dtype in dtypes.items() - if pd.api.types.is_categorical_dtype(dtype) and (column in df) + if isinstance(dtype, pd.CategoricalDtype) and (column in df) ] for column in categorical_dtypes: - if not pd.api.types.is_categorical_dtype(df[column]): + if not isinstance(df[column].dtype, pd.CategoricalDtype): _logger.info(f"Casting {column} to categorical.") changed_dtypes[column] = df[column].astype(dtypes[column]) elif list(df[column].cat.categories) != list(dtypes[column].categories): diff --git a/src/glum_benchmarks/orig_sklearn_fork/_glm.py b/src/glum_benchmarks/orig_sklearn_fork/_glm.py index 143a00c6..c521e6fc 100644 --- a/src/glum_benchmarks/orig_sklearn_fork/_glm.py +++ b/src/glum_benchmarks/orig_sklearn_fork/_glm.py @@ -757,15 +757,15 @@ def power(self, power): if not isinstance(power, numbers.Real): raise TypeError("power must be a real number, input was {}".format(power)) - self._upper_bound = np.Inf + self._upper_bound = np.inf self._include_upper_bound = False if power < 0: # Extreme Stable - self._lower_bound = -np.Inf + self._lower_bound = -np.inf self._include_lower_bound = False elif power == 0: # NormalDistribution - self._lower_bound = -np.Inf + self._lower_bound = -np.inf self._include_lower_bound = False elif (power > 0) and (power < 1): raise ValueError("For 0