From ff2f83fe104880dbde73ccc63eeba3512a4f1d11 Mon Sep 17 00:00:00 2001 From: Robbie Muir Date: Tue, 9 Dec 2025 22:52:51 +0100 Subject: [PATCH 1/5] first attempt --- linopy/expressions.py | 80 ++++++++++++++++++++++++++++++++++ test/test_linear_expression.py | 60 +++++++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/linopy/expressions.py b/linopy/expressions.py index d60c8be5..5d5f0f21 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1451,6 +1451,86 @@ def to_polars(self) -> pl.DataFrame: check_has_nulls_polars(df, name=self.type) return df + def simplify(self) -> LinearExpression: + """ + Simplify the linear expression by combining terms with the same variable. + + This method finds all terms that reference the same variable and adds + their coefficients together, reducing the number of terms in the expression. + + Returns + ------- + LinearExpression + A new LinearExpression with combined terms. + + Examples + -------- + >>> from linopy import Model + >>> m = Model() + >>> x = m.add_variables(name="x") + >>> expr = 2 * x + 3 * x # Creates two terms + >>> simplified = expr.simplify() # Combines into one term: 5 * x + """ + + def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: + """Simplify a single row by grouping vars and summing coefficients. + + Returns a 2D array of shape (2, input_len) where first row is vars, second is coeffs. + """ + input_len = len(vars_row) + + # Filter out invalid entries + mask = (vars_row != -1) & (coeffs_row != 0) & ~np.isnan(coeffs_row) + valid_vars = vars_row[mask] + valid_coeffs = coeffs_row[mask] + + if len(valid_vars) == 0: + # Return arrays filled with -1 and 0.0, same length as input + return np.vstack([ + np.full(input_len, -1, dtype=float), + np.zeros(input_len, dtype=float) + ]) + + # Use bincount to sum coefficients for each variable ID efficiently + max_var = int(valid_vars.max()) + summed = np.bincount(valid_vars, weights=valid_coeffs, minlength=max_var + 1) + + # Get non-zero entries + unique_vars = np.where(summed != 0)[0] + unique_coeffs = summed[unique_vars] + + # Pad to match input length + result_vars = np.full(input_len, -1, dtype=float) + result_coeffs = np.zeros(input_len, dtype=float) + + n_unique = len(unique_vars) + result_vars[:n_unique] = unique_vars + result_coeffs[:n_unique] = unique_coeffs + + return np.vstack([result_vars, result_coeffs]) + + # Stack vars and coeffs, apply simplification once, then unstack + combined = xr.apply_ufunc( + _simplify_row, + self.vars, + self.coeffs, + input_core_dims=[[TERM_DIM], [TERM_DIM]], + output_core_dims=[["_field", TERM_DIM]], + vectorize=True, + ) + + # Extract vars and coeffs from the combined result + vars_simplified = combined.isel(_field=0).astype(int) + coeffs_simplified = combined.isel(_field=1) + + # Create new dataset with simplified data + new_data = self.data.copy() + new_data = assign_multiindex_safe( + new_data, vars=vars_simplified, coeffs=coeffs_simplified + ) + + return LinearExpression(new_data, self.model).densify_terms() + @classmethod def _from_scalarexpression_list( cls, diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 2551c203..71301103 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1191,3 +1191,63 @@ def test_cumsum(m: Model, multiple: float) -> None: expr = m.variables["x"] + m.variables["y"] cumsum = (multiple * expr).cumsum() cumsum.nterm == 2 + + +def test_simplify_basic(x: Variable) -> None: + """Test basic simplification with duplicate terms.""" + expr = 2 * x + 3 * x + simplified = expr.simplify() + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + + # Check that the coefficient is 5 + coeffs: np.ndarray = simplified.coeffs.values + assert len(coeffs) == 1, f"Expected 1 valid coefficient, got {len(coeffs)}" + assert all(coeffs == 5.0), f"Expected coefficient 5.0, got {coeffs[0]}" + + +def test_simplify_array(x: Variable) -> None: + """Test simplification with array variables.""" + # Create expression with duplicate terms + expr = 2 * x + 3 * x + x + # Simplify + simplified = expr.simplify() + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + assert all(simplified.coeffs.values == 6), ( + f"Expected coefficients of 6, got {simplified.coeffs.values}" + ) + + +def test_simplify_with_different_variables(x: Variable, y: Variable) -> None: + """Test that different variables are kept separate.""" + # Create expression: 2*x + 3*x + 4*y + expr = 2 * x + 3 * x + 4 * y + + # Simplify + simplified = expr.simplify() + # Should have 2 terms (one for x with coeff 5, one for y with coeff 4) + assert simplified.nterm == 2, f"Expected 2 terms, got {simplified.nterm}" + + coeffs: np.ndarray = simplified.coeffs.values + assert len(coeffs) == 2, f"Expected 2 valid coefficients, got {len(coeffs)}" + # Check that coefficients are 5 and 4 (in some order) + assert set(coeffs) == {5.0, 4.0}, ( + f"Expected coefficients {{5.0, 4.0}}, got {set(coeffs)}" + ) + + +def test_simplify_with_constant(x: Variable) -> None: + """Test that constants are preserved.""" + expr = 2 * x + 3 * x + 10 + + # Simplify + simplified = expr.simplify() + + # Check constant is preserved + assert all(simplified.const.values == 10.0), ( + f"Expected constant 10.0, got {simplified.const.values}" + ) + + # Check coefficients + assert all(simplified.coeffs.values == 5.0), ( + f"Expected coefficient 5.0, got {simplified.coeffs.values}" + ) From c65098fd4ef6e3f9a6633908fdae38124d56f807 Mon Sep 17 00:00:00 2001 From: Robbie Muir Date: Tue, 9 Dec 2025 23:20:11 +0100 Subject: [PATCH 2/5] formatting --- doc/release_notes.rst | 1 + linopy/constants.py | 2 ++ linopy/expressions.py | 51 +++++++++++++++++++++------------- test/test_linear_expression.py | 22 +++++++++------ 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 5ca5ecc7..68d21caf 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -3,6 +3,7 @@ Release Notes .. Upcoming Version +* Add simplify method to LinearExpression to combine duplicate terms * Fix compatibility for xpress versions below 9.6 (regression) * Performance: Up to 50x faster ``repr()`` for variables/constraints via O(log n) label lookup and direct numpy indexing * Performance: Up to 46x faster ``ncons`` property by replacing ``.flat.labels.unique()`` with direct counting diff --git a/linopy/constants.py b/linopy/constants.py index 3f6886ec..021a9a10 100644 --- a/linopy/constants.py +++ b/linopy/constants.py @@ -39,12 +39,14 @@ GROUP_DIM = "_group" FACTOR_DIM = "_factor" CONCAT_DIM = "_concat" +CV_DIM = "_cv" HELPER_DIMS: list[str] = [ TERM_DIM, STACKED_TERM_DIM, GROUPED_TERM_DIM, FACTOR_DIM, CONCAT_DIM, + CV_DIM, ] diff --git a/linopy/expressions.py b/linopy/expressions.py index 5d5f0f21..1fc55f32 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -66,6 +66,7 @@ ) from linopy.config import options from linopy.constants import ( + CV_DIM, EQUAL, FACTOR_DIM, GREATER_EQUAL, @@ -1473,12 +1474,13 @@ def simplify(self) -> LinearExpression: """ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: - """Simplify a single row by grouping vars and summing coefficients. - + """ + Simplify a single row by grouping vars and summing coefficients. + Returns a 2D array of shape (2, input_len) where first row is vars, second is coeffs. """ input_len = len(vars_row) - + # Filter out invalid entries mask = (vars_row != -1) & (coeffs_row != 0) & ~np.isnan(coeffs_row) valid_vars = vars_row[mask] @@ -1486,50 +1488,59 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: if len(valid_vars) == 0: # Return arrays filled with -1 and 0.0, same length as input - return np.vstack([ - np.full(input_len, -1, dtype=float), - np.zeros(input_len, dtype=float) - ]) + return np.vstack( + [ + np.full(input_len, -1, dtype=float), + np.zeros(input_len, dtype=float), + ] + ) # Use bincount to sum coefficients for each variable ID efficiently max_var = int(valid_vars.max()) - summed = np.bincount(valid_vars, weights=valid_coeffs, minlength=max_var + 1) + summed = np.bincount( + valid_vars, weights=valid_coeffs, minlength=max_var + 1 + ) # Get non-zero entries unique_vars = np.where(summed != 0)[0] unique_coeffs = summed[unique_vars] - + # Pad to match input length result_vars = np.full(input_len, -1, dtype=float) result_coeffs = np.zeros(input_len, dtype=float) - + n_unique = len(unique_vars) result_vars[:n_unique] = unique_vars result_coeffs[:n_unique] = unique_coeffs return np.vstack([result_vars, result_coeffs]) - # Stack vars and coeffs, apply simplification once, then unstack - combined = xr.apply_ufunc( + # Coeffs and vars have dimensions (.., TERM_DIM) + # A row-wise operation is applied over the .. dimensions on both coeffs and vars, which are stacked together over a new "CV_DIM" dimension + combined: xr.DataArray = xr.apply_ufunc( _simplify_row, self.vars, self.coeffs, input_core_dims=[[TERM_DIM], [TERM_DIM]], - output_core_dims=[["_field", TERM_DIM]], + output_core_dims=[[CV_DIM, TERM_DIM]], vectorize=True, ) - + # Combined has dimensions (.., CV_DIM, TERM_DIM) + + # Drop terms where all vars are -1 (i.e., empty terms across all positions) + vars = combined.isel({CV_DIM: 0}).astype(int) + non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM]) + combined = combined.isel({TERM_DIM: non_empty_terms}) + # Extract vars and coeffs from the combined result - vars_simplified = combined.isel(_field=0).astype(int) - coeffs_simplified = combined.isel(_field=1) + vars = combined.isel({CV_DIM: 0}).astype(int) + coeffs = combined.isel({CV_DIM: 1}) # Create new dataset with simplified data new_data = self.data.copy() - new_data = assign_multiindex_safe( - new_data, vars=vars_simplified, coeffs=coeffs_simplified - ) + new_data = assign_multiindex_safe(new_data, vars=vars, coeffs=coeffs) - return LinearExpression(new_data, self.model).densify_terms() + return LinearExpression(new_data, self.model) @classmethod def _from_scalarexpression_list( diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 71301103..19ef5d44 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1195,23 +1195,29 @@ def test_cumsum(m: Model, multiple: float) -> None: def test_simplify_basic(x: Variable) -> None: """Test basic simplification with duplicate terms.""" - expr = 2 * x + 3 * x + expr = 2 * x + 3 * x + 1 * x simplified = expr.simplify() assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + x_len = len(x.coords["dim_0"]) # Check that the coefficient is 5 coeffs: np.ndarray = simplified.coeffs.values - assert len(coeffs) == 1, f"Expected 1 valid coefficient, got {len(coeffs)}" - assert all(coeffs == 5.0), f"Expected coefficient 5.0, got {coeffs[0]}" + assert len(coeffs) == x_len, f"Expected {x_len} coefficients, got {len(coeffs)}" + assert all(coeffs == 6.0), f"Expected coefficient 5.0, got {coeffs[0]}" -def test_simplify_array(x: Variable) -> None: - """Test simplification with array variables.""" - # Create expression with duplicate terms +def test_simplify_multiple_dimensions() -> None: + model = Model() + a_index = pd.Index([0, 1, 2, 3], name="a") + b_index = pd.Index([0, 1, 2], name="b") + coords = [a_index, b_index] + x = model.add_variables(name="x", coords=coords) + expr = 2 * x + 3 * x + x # Simplify simplified = expr.simplify() assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + assert simplified.ndim == 2, f"Expected 2 dimensions, got {simplified.ndim}" assert all(simplified.coeffs.values == 6), ( f"Expected coefficients of 6, got {simplified.coeffs.values}" ) @@ -1227,9 +1233,7 @@ def test_simplify_with_different_variables(x: Variable, y: Variable) -> None: # Should have 2 terms (one for x with coeff 5, one for y with coeff 4) assert simplified.nterm == 2, f"Expected 2 terms, got {simplified.nterm}" - coeffs: np.ndarray = simplified.coeffs.values - assert len(coeffs) == 2, f"Expected 2 valid coefficients, got {len(coeffs)}" - # Check that coefficients are 5 and 4 (in some order) + coeffs: list[float] = simplified.coeffs.values.flatten().tolist() assert set(coeffs) == {5.0, 4.0}, ( f"Expected coefficients {{5.0, 4.0}}, got {set(coeffs)}" ) From 3876ef797c3883183b18b345bbf7a54aef805fb6 Mon Sep 17 00:00:00 2001 From: Robbie Muir Date: Tue, 9 Dec 2025 23:24:50 +0100 Subject: [PATCH 3/5] updated doc --- linopy/expressions.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/linopy/expressions.py b/linopy/expressions.py index 1fc55f32..539500a7 100644 --- a/linopy/expressions.py +++ b/linopy/expressions.py @@ -1475,9 +1475,7 @@ def simplify(self) -> LinearExpression: def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: """ - Simplify a single row by grouping vars and summing coefficients. - - Returns a 2D array of shape (2, input_len) where first row is vars, second is coeffs. + For a given combination of expression coordinates, try to simplify by reducing duplicate variables """ input_len = len(vars_row) @@ -1515,8 +1513,8 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: return np.vstack([result_vars, result_coeffs]) - # Coeffs and vars have dimensions (.., TERM_DIM) - # A row-wise operation is applied over the .. dimensions on both coeffs and vars, which are stacked together over a new "CV_DIM" dimension + # Coeffs and vars have dimensions (.., TERM_DIM) where .. are the coordinate dimensions of the expression + # An operation is applied over the coordinate dimensions on both coeffs and vars, which are stacked together over a new "CV_DIM" dimension combined: xr.DataArray = xr.apply_ufunc( _simplify_row, self.vars, @@ -1527,7 +1525,7 @@ def _simplify_row(vars_row: np.ndarray, coeffs_row: np.ndarray) -> np.ndarray: ) # Combined has dimensions (.., CV_DIM, TERM_DIM) - # Drop terms where all vars are -1 (i.e., empty terms across all positions) + # Drop terms where all vars are -1 (i.e., empty terms across all coordinates) vars = combined.isel({CV_DIM: 0}).astype(int) non_empty_terms = (vars != -1).any(dim=[d for d in vars.dims if d != TERM_DIM]) combined = combined.isel({TERM_DIM: non_empty_terms}) From 967e540df40c32ef1a80d88bfe6c2b4c35de69ca Mon Sep 17 00:00:00 2001 From: Robbie Muir Date: Wed, 10 Dec 2025 09:08:26 +0100 Subject: [PATCH 4/5] fixed test --- test/test_linear_expression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index 19ef5d44..fe77e3b3 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1218,7 +1218,7 @@ def test_simplify_multiple_dimensions() -> None: simplified = expr.simplify() assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" assert simplified.ndim == 2, f"Expected 2 dimensions, got {simplified.ndim}" - assert all(simplified.coeffs.values == 6), ( + assert all(simplified.coeffs.values.reshape(-1) == 6), ( f"Expected coefficients of 6, got {simplified.coeffs.values}" ) From 8f78526be6646d702415414c96d2efe04807e6a4 Mon Sep 17 00:00:00 2001 From: Fabian Date: Tue, 16 Dec 2025 14:14:21 +0100 Subject: [PATCH 5/5] fix simplify tests: correct comment and add cancellation tests - Fix misleading comment/error message (coefficient is 6, not 5) - Add test for full cancellation (x - x = 0) - Add test for partial cancellation (2x - 2x + 3y = 3y) --- test/test_linear_expression.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py index fe77e3b3..5a05e3b6 100644 --- a/test/test_linear_expression.py +++ b/test/test_linear_expression.py @@ -1200,10 +1200,10 @@ def test_simplify_basic(x: Variable) -> None: assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" x_len = len(x.coords["dim_0"]) - # Check that the coefficient is 5 + # Check that the coefficient is 6 (2 + 3 + 1) coeffs: np.ndarray = simplified.coeffs.values assert len(coeffs) == x_len, f"Expected {x_len} coefficients, got {len(coeffs)}" - assert all(coeffs == 6.0), f"Expected coefficient 5.0, got {coeffs[0]}" + assert all(coeffs == 6.0), f"Expected coefficient 6.0, got {coeffs[0]}" def test_simplify_multiple_dimensions() -> None: @@ -1255,3 +1255,24 @@ def test_simplify_with_constant(x: Variable) -> None: assert all(simplified.coeffs.values == 5.0), ( f"Expected coefficient 5.0, got {simplified.coeffs.values}" ) + + +def test_simplify_cancellation(x: Variable) -> None: + """Test that terms cancel out correctly when coefficients sum to zero.""" + expr = x - x + simplified = expr.simplify() + + assert simplified.nterm == 0, f"Expected 0 terms, got {simplified.nterm}" + assert simplified.coeffs.values.size == 0 + assert simplified.vars.values.size == 0 + + +def test_simplify_partial_cancellation(x: Variable, y: Variable) -> None: + """Test partial cancellation where some terms cancel but others remain.""" + expr = 2 * x - 2 * x + 3 * y + simplified = expr.simplify() + + assert simplified.nterm == 1, f"Expected 1 term, got {simplified.nterm}" + assert all(simplified.coeffs.values == 3.0), ( + f"Expected coefficient 3.0, got {simplified.coeffs.values}" + )