Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metadata attributes to ashist() for improved plotting #635

Merged
merged 10 commits into from
Dec 10, 2024
2 changes: 1 addition & 1 deletion benchmarks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ def setup():
def dual(H):
H.dual()

benchmark.pedantic(dual, setup=setup, rounds=rounds)
benchmark.pedantic(dual, setup=setup, rounds=rounds)
62 changes: 62 additions & 0 deletions tests/stats/test_core_stats_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,68 @@ def test_issue_468():
assert H.edges.size.ashist().equals(df)




def test_ashist_attrs_exist():
"""Test that ashist returns DataFrame with expected attributes."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Check that all expected attributes exist
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs


def test_ashist_density_labels():
"""Test that ylabel changes based on density parameter."""
H = xgi.sunflower(3, 1, 20)

# Test default (density=False)
df_count = H.edges.size.ashist(density=False)
assert df_count.attrs['ylabel'] == 'Count'

# Test with density=True
df_density = H.edges.size.ashist(density=True)
assert df_density.attrs['ylabel'] == 'Probability'


def test_ashist_original_functionality():
"""Test that adding attributes doesn't break original functionality."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Original test case should still pass
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df.equals(expected_df) # Original functionality

# And should have attributes
assert 'xlabel' in df.attrs



def test_ashist_single_unique_value():
"""Test ashist when there is only one unique value and multiple bins."""
H = xgi.Hypergraph()
H.add_nodes_from(range(5))
# All edges have the same size
H.add_edges_from([[0, 1], [2, 3], [4, 0]])

# The edge sizes will all be 2
df = H.edges.size.ashist(bins=10)

# Since there's only one unique value, bins should be set to 1
assert len(df) == 1 # Only one bin should be present
assert df['bin_center'].iloc[0] == 2 # The bin center should be the unique value
assert df['value'].iloc[0] == 3 # There are three edges of size 2

# Check that attributes are present
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs



### Attribute statistics


Expand Down
21 changes: 19 additions & 2 deletions xgi/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def aspandas(self):
"""
return pd.Series(self._val, name=self.name)


def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
"""Return the distribution of a numpy array.

Expand All @@ -180,7 +181,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
Whether to bin the values with log-sized bins.
By default, False.


Returns
-------
Pandas DataFrame
Expand All @@ -189,6 +189,11 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
is True, outputs two additional columns, `bin_lo` and `bin_hi`,
which outputs the left and right bin edges respectively.

The DataFrame includes the following attributes:
- attrs['xlabel']: Label for x-axis
- attrs['ylabel']: 'Count' or 'Probability' based on density parameter
- attrs['title']: Plot title

Notes
-----
Originally from https://github.com/jkbren/networks-and-dataviz
Expand All @@ -199,7 +204,19 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
if isinstance(bins, int) and len(set(self.aslist())) == 1:
bins = 1

return hist(self.asnumpy(), bins, bin_edges, density, log_binning)
# My modifications below

# Get the histogram Dataframe
df = hist(self.asnumpy(), bins, bin_edges, density, log_binning)

# Add metadata attributes
df.attrs["xlabel"] = "Value"
df.attrs["ylabel"] = "Probability" if density else "Count"
df.attrs["title"] = "Histogram"

return df



def max(self):
"""The maximum value of this stat."""
Expand Down
Loading