Skip to content

Commit

Permalink
feat: pretty-print sequence index
Browse files Browse the repository at this point in the history
  • Loading branch information
MilesCranmer committed Aug 26, 2024
1 parent 63e7785 commit 516e4e6
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 28 deletions.
40 changes: 26 additions & 14 deletions pysr/regressor_sequence.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import warnings
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union

import numpy as np
from sklearn.base import BaseEstimator

from .sr import PySRRegressor
from .utils import ArrayLike
from .utils import ArrayLike, _subscriptify


def _check_assertions(
Expand Down Expand Up @@ -67,22 +67,33 @@ def __init__(

def _construct_variable_names(
self, n_features: int, variable_names: Optional[List[str]]
):
) -> Tuple[List[str], List[str]]:
if not isinstance(variable_names, list):
if n_features == 1:
return [f"x_t{i}" for i in range(self.recursive_history_length, 0, -1)]
variable_names = ["x"]
display_variable_names = ["x"]
else:
return [
f"x{i}_t{j}"
for j in range(self.recursive_history_length, 0, -1)
for i in range(n_features)
variable_names = [f"x{i}" for i in range(n_features)]
display_variable_names = [
f"x{_subscriptify(i)}" for i in range(n_features)
]
else:
return [
i + "_t" + str(j)
for j in range(self.recursive_history_length, 0, -1)
for i in variable_names
]
display_variable_names = variable_names

# e.g., `x0_tm1`
variable_names_with_time = [
f"{var}_tm{j}"
for j in range(self.recursive_history_length, 0, -1)
for var in variable_names
]
# e.g., `x₀[t-1]`
display_variable_names_with_time = [
f"{var}[t-{j}]"
for j in range(self.recursive_history_length, 0, -1)
for var in display_variable_names
]

return variable_names_with_time, display_variable_names_with_time

def fit(
self,
Expand Down Expand Up @@ -150,7 +161,7 @@ def fit(
y_units = X_units
if isinstance(weights, np.ndarray):
weights = weights[self.recursive_history_length :]
variable_names = self._construct_variable_names(
variable_names, display_variable_names = self._construct_variable_names(
current_X.shape[1], variable_names
)

Expand All @@ -159,6 +170,7 @@ def fit(
y=current_X,
weights=weights,
variable_names=variable_names,
display_variable_names=display_variable_names,
X_units=X_units,
y_units=y_units,
complexity_of_variables=complexity_of_variables,
Expand Down
87 changes: 73 additions & 14 deletions pysr/sr.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def _check_assertions(
X,
use_custom_variable_names,
variable_names,
display_variable_names,
complexity_of_variables,
weights,
y,
Expand All @@ -153,6 +154,7 @@ def _check_assertions(
assert X.shape[0] == weights.shape[0]
if use_custom_variable_names:
assert len(variable_names) == X.shape[1]
assert len(display_variable_names) == X.shape[1]
# Check none of the variable names are function names:
for var_name in variable_names:
# Check if alphanumeric only:
Expand Down Expand Up @@ -1361,6 +1363,7 @@ def _validate_and_set_fit_params(
Xresampled,
weights,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
Expand All @@ -1370,6 +1373,7 @@ def _validate_and_set_fit_params(
Optional[ndarray],
Optional[ndarray],
ArrayLike[str],
Optional[ArrayLike[str]],
Union[int, float, List[Union[int, float]]],
Optional[ArrayLike[str]],
Optional[Union[str, ArrayLike[str]]],
Expand All @@ -1395,6 +1399,8 @@ def _validate_and_set_fit_params(
for that particular element of y.
variable_names : ndarray of length n_features
Names of each feature in the training dataset, `X`.
display_variable_names : ndarray of length n_features
Custom variable names to display in the progress bar output.
complexity_of_variables : int | float | list[int | float]
Complexity of each feature in the training dataset, `X`.
X_units : list[str] of length n_features
Expand All @@ -1412,12 +1418,21 @@ def _validate_and_set_fit_params(
Validated resampled training data used for denoising.
variable_names_validated : list[str] of length n_features
Validated list of variable names for each feature in `X`.
display_variable_names_validated : list[str] of length n_features
Validated list of variable names to display in the progress bar output.
X_units : list[str] of length n_features
Validated units for `X`.
y_units : str | list[str] of length n_out
Validated units for `y`.
"""
if display_variable_names is not None:
assert (
variable_names is not None
), "`variable_names` must be provided if `display_variable_names` is provided."
assert len(display_variable_names) == len(
variable_names
), "`display_variable_names` must be the same length as `variable_names`."
if isinstance(X, pd.DataFrame):
if variable_names:
variable_names = None
Expand Down Expand Up @@ -1478,9 +1493,14 @@ def _validate_and_set_fit_params(
[f"x{_subscriptify(i)}" for i in range(X.shape[1])]
)
variable_names = self.feature_names_in_
display_variable_names = self.display_feature_names_in_
else:
self.display_feature_names_in_ = self.feature_names_in_
if display_variable_names is None:
self.display_feature_names_in_ = self.feature_names_in_
else:
self.display_feature_names_in_ = display_variable_names
variable_names = self.feature_names_in_
display_variable_names = self.display_feature_names_in_

# Handle multioutput data
if len(y.shape) == 1 or (len(y.shape) == 2 and y.shape[1] == 1):
Expand All @@ -1500,6 +1520,7 @@ def _validate_and_set_fit_params(
Xresampled,
weights,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
Expand All @@ -1519,6 +1540,7 @@ def _pre_transform_training_data(
y: ndarray,
Xresampled: Union[ndarray, None],
variable_names: ArrayLike[str],
display_variable_names: ArrayLike[str],
complexity_of_variables: Union[int, float, List[Union[int, float]]],
X_units: Union[ArrayLike[str], None],
y_units: Union[ArrayLike[str], str, None],
Expand All @@ -1542,6 +1564,9 @@ def _pre_transform_training_data(
variable_names : list[str]
Names of each variable in the training dataset, `X`.
Of length `n_features`.
display_variable_names : list[str]
Custom variable names to display in the progress bar output.
Of length `n_features`.
complexity_of_variables : int | float | list[int | float]
Complexity of each variable in the training dataset, `X`.
X_units : list[str]
Expand Down Expand Up @@ -1569,6 +1594,8 @@ def _pre_transform_training_data(
variable_names_transformed : list[str] of length n_features
Names of each variable in the transformed dataset,
`X_transformed`.
display_variable_names_transformed : list[str] of length n_features
Custom variable names to display in the progress bar output.
X_units_transformed : list[str] of length n_features
Units of each variable in the transformed dataset.
y_units_transformed : str | list[str] of length n_out
Expand All @@ -1593,6 +1620,14 @@ def _pre_transform_training_data(
if selection_mask[i]
],
)
display_variable_names = cast(
ArrayLike[str],
[
display_variable_names[i]
for i in range(len(display_variable_names))
if selection_mask[i]
],
)

if isinstance(complexity_of_variables, list):
complexity_of_variables = [
Expand All @@ -1614,7 +1649,7 @@ def _pre_transform_training_data(
# Update feature names with selected variable names
self.selection_mask_ = selection_mask
self.feature_names_in_ = _check_feature_names_in(self, variable_names)
self.display_feature_names_in_ = self.feature_names_in_
self.display_feature_names_in_ = display_variable_names
print(f"Using features {self.feature_names_in_}")

# Denoising transformation
Expand All @@ -1626,7 +1661,15 @@ def _pre_transform_training_data(
else:
X, y = denoise(X, y, Xresampled=Xresampled, random_state=random_state)

return X, y, variable_names, complexity_of_variables, X_units, y_units
return (
X,
y,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
)

def _run(
self,
Expand Down Expand Up @@ -1934,6 +1977,7 @@ def fit(
Xresampled=None,
weights=None,
variable_names: Optional[ArrayLike[str]] = None,
display_variable_names: Optional[ArrayLike[str]] = None,
complexity_of_variables: Optional[
Union[int, float, List[Union[int, float]]]
] = None,
Expand Down Expand Up @@ -1966,6 +2010,11 @@ def fit(
instead of `variable_names`. Cannot contain spaces or special
characters. Avoid variable names which are also
function names in `sympy`, such as "N".
display_variable_names : list[str]
Custom variable names to display in the progress bar output, if
different from `variable_names`. For example, if you want to print
specific unicode characters which are not allowed in `variable_names`,
you can use `display_variable_names` to specify the names.
X_units : list[str]
A list of units for each variable in `X`. Each unit should be
a string representing a Julia expression. See DynamicQuantities.jl
Expand Down Expand Up @@ -2011,6 +2060,7 @@ def fit(
Xresampled,
weights,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
Expand All @@ -2020,6 +2070,7 @@ def fit(
Xresampled,
weights,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
Expand All @@ -2040,17 +2091,24 @@ def fit(
seed = cast(int, random_state.randint(0, 2**31 - 1)) # For julia random

# Pre transformations (feature selection and denoising)
X, y, variable_names, complexity_of_variables, X_units, y_units = (
self._pre_transform_training_data(
X,
y,
Xresampled,
variable_names,
complexity_of_variables,
X_units,
y_units,
random_state,
)
(
X,
y,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
) = self._pre_transform_training_data(
X,
y,
Xresampled,
variable_names,
display_variable_names,
complexity_of_variables,
X_units,
y_units,
random_state,
)

# Warn about large feature counts (still warn if feature count is large
Expand All @@ -2071,6 +2129,7 @@ def fit(
X,
use_custom_variable_names,
variable_names,
display_variable_names,
complexity_of_variables,
weights,
y,
Expand Down

0 comments on commit 516e4e6

Please sign in to comment.