diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..2aca9eb8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -157,4 +157,4 @@ def setup(): def dual(H): H.dual() - benchmark.pedantic(dual, setup=setup, rounds=rounds) + benchmark.pedantic(dual, setup=setup, rounds=rounds) \ No newline at end of file diff --git a/tests/stats/test_core_stats_functions.py b/tests/stats/test_core_stats_functions.py index a5581f77..f0c5a36d 100644 --- a/tests/stats/test_core_stats_functions.py +++ b/tests/stats/test_core_stats_functions.py @@ -575,6 +575,68 @@ def test_issue_468(): assert H.edges.size.ashist().equals(df) + + +def test_ashist_attrs_exist(): + """Test that ashist returns DataFrame with expected attributes.""" + H = xgi.sunflower(3, 1, 20) + df = H.edges.size.ashist() + + # Check that all expected attributes exist + assert 'xlabel' in df.attrs + assert 'ylabel' in df.attrs + assert 'title' in df.attrs + + +def test_ashist_density_labels(): + """Test that ylabel changes based on density parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test default (density=False) + df_count = H.edges.size.ashist(density=False) + assert df_count.attrs['ylabel'] == 'Count' + + # Test with density=True + df_density = H.edges.size.ashist(density=True) + assert df_density.attrs['ylabel'] == 'Probability' + + +def test_ashist_original_functionality(): + """Test that adding attributes doesn't break original functionality.""" + H = xgi.sunflower(3, 1, 20) + df = H.edges.size.ashist() + + # Original test case should still pass + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) # Original functionality + + # And should have attributes + assert 'xlabel' in df.attrs + + + +def test_ashist_single_unique_value(): + """Test ashist when there is only one unique value and multiple bins.""" + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + # All edges have the same size + H.add_edges_from([[0, 1], [2, 3], [4, 0]]) + + # The edge sizes will all be 2 + df = H.edges.size.ashist(bins=10) + + # Since there's only one unique value, bins should be set to 1 + assert len(df) == 1 # Only one bin should be present + assert df['bin_center'].iloc[0] == 2 # The bin center should be the unique value + assert df['value'].iloc[0] == 3 # There are three edges of size 2 + + # Check that attributes are present + assert 'xlabel' in df.attrs + assert 'ylabel' in df.attrs + assert 'title' in df.attrs + + + ### Attribute statistics diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py index 5e0e0648..bc82dd84 100644 --- a/xgi/stats/__init__.py +++ b/xgi/stats/__init__.py @@ -162,6 +162,7 @@ def aspandas(self): """ return pd.Series(self._val, name=self.name) + def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): """Return the distribution of a numpy array. @@ -180,7 +181,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): Whether to bin the values with log-sized bins. By default, False. - Returns ------- Pandas DataFrame @@ -189,6 +189,11 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): is True, outputs two additional columns, `bin_lo` and `bin_hi`, which outputs the left and right bin edges respectively. + The DataFrame includes the following attributes: + - attrs['xlabel']: Label for x-axis + - attrs['ylabel']: 'Count' or 'Probability' based on density parameter + - attrs['title']: Plot title + Notes ----- Originally from https://github.com/jkbren/networks-and-dataviz @@ -199,7 +204,19 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): if isinstance(bins, int) and len(set(self.aslist())) == 1: bins = 1 - return hist(self.asnumpy(), bins, bin_edges, density, log_binning) + # My modifications below + + # Get the histogram Dataframe + df = hist(self.asnumpy(), bins, bin_edges, density, log_binning) + + # Add metadata attributes + df.attrs["xlabel"] = "Value" + df.attrs["ylabel"] = "Probability" if density else "Count" + df.attrs["title"] = "Histogram" + + return df + + def max(self): """The maximum value of this stat."""