From 10197936a5c7de23f0df79de15b8bcb6e419b316 Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 21:47:14 -0500 Subject: [PATCH 1/8] test: Add simple benchmark test --- benchmarks/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..7cb4d2a8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -3,7 +3,13 @@ rounds = 10 fname = "benchmarks/email-enron.json" - +# Below is an example so I can better understand the control flow +def test_simple_benchmark(): + """A simple benchmark to test the workflow.""" + x = 0 + for i in range(1000): + x += i + return x def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) From cd5fdd17a75afbf192847291180e52954557fdc2 Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 22:15:08 -0500 Subject: [PATCH 2/8] test: Add pedantic benchmark test to understand workflow --- benchmarks/core.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/benchmarks/core.py b/benchmarks/core.py index 062c2533..e1d28fb9 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -4,6 +4,21 @@ fname = "benchmarks/email-enron.json" +# Below is an example so I can better understand the control flow +def test_simple_benchmark(benchmark): + """A simple benchmark to test the workflow.""" + def setup(): + # No setup needed for this simple test + return (), {} + + def run_benchmark(): + x = 0 + for i in range(1000): + x += i + return x + + benchmark.pedantic(run_benchmark, setup=setup, rounds=rounds) + def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) From 5df4b753a210db7608b5110c40ed3e1401a41e9f Mon Sep 17 00:00:00 2001 From: will collins Date: Mon, 2 Dec 2024 22:54:28 -0500 Subject: [PATCH 3/8] Attempting to change results from data.js --- .github/workflows/benchmark.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 64e4c433..a18a51cc 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -37,4 +37,7 @@ jobs: # Show alert with commit comment on detecting possible performance regression alert-threshold: '200%' comment-on-alert: false - fail-on-alert: true \ No newline at end of file + fail-on-alert: true + output-metric: 'mean' + metric-unit: 'ms' + metric-scale: '1000' \ No newline at end of file From b5261b26c01fd8f6967b2f8650a42a4e773b2def Mon Sep 17 00:00:00 2001 From: will collins Date: Tue, 3 Dec 2024 00:02:11 -0500 Subject: [PATCH 4/8] Reverted benchmarks.yml back to normal --- .github/workflows/benchmark.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a18a51cc..0e139006 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -38,6 +38,3 @@ jobs: alert-threshold: '200%' comment-on-alert: false fail-on-alert: true - output-metric: 'mean' - metric-unit: 'ms' - metric-scale: '1000' \ No newline at end of file From 2b77a7c19ce8dfe1254da9f5ed7cf18f89c41b9f Mon Sep 17 00:00:00 2001 From: will collins Date: Tue, 3 Dec 2024 11:46:11 -0500 Subject: [PATCH 5/8] Added pull_request to benchmark.yml workflow to understand what happens when a pull request is made --- .github/workflows/benchmark.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 0e139006..6b0ed8b9 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -3,6 +3,9 @@ on: push: branches: - main + pull_request: + branches: + - main workflow_dispatch: permissions: From 19eff7d45c2e6854479299c6772789d331cb4c71 Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 14:36:09 -0500 Subject: [PATCH 6/8] Added two branches for each fix and added ashist function to add-metadata-attributes branch --- xgi/stats/__init__.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/xgi/stats/__init__.py b/xgi/stats/__init__.py index 5e0e0648..bc82dd84 100644 --- a/xgi/stats/__init__.py +++ b/xgi/stats/__init__.py @@ -162,6 +162,7 @@ def aspandas(self): """ return pd.Series(self._val, name=self.name) + def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): """Return the distribution of a numpy array. @@ -180,7 +181,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): Whether to bin the values with log-sized bins. By default, False. - Returns ------- Pandas DataFrame @@ -189,6 +189,11 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): is True, outputs two additional columns, `bin_lo` and `bin_hi`, which outputs the left and right bin edges respectively. + The DataFrame includes the following attributes: + - attrs['xlabel']: Label for x-axis + - attrs['ylabel']: 'Count' or 'Probability' based on density parameter + - attrs['title']: Plot title + Notes ----- Originally from https://github.com/jkbren/networks-and-dataviz @@ -199,7 +204,19 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False): if isinstance(bins, int) and len(set(self.aslist())) == 1: bins = 1 - return hist(self.asnumpy(), bins, bin_edges, density, log_binning) + # My modifications below + + # Get the histogram Dataframe + df = hist(self.asnumpy(), bins, bin_edges, density, log_binning) + + # Add metadata attributes + df.attrs["xlabel"] = "Value" + df.attrs["ylabel"] = "Probability" if density else "Count" + df.attrs["title"] = "Histogram" + + return df + + def max(self): """The maximum value of this stat.""" From b62d456e9520dad7d7beefe61321c13ec6945884 Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 14:39:37 -0500 Subject: [PATCH 7/8] Added unittests to add-metadata-attributes --- tests/stats/test_core_stats_functions.py | 62 ++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/stats/test_core_stats_functions.py b/tests/stats/test_core_stats_functions.py index a5581f77..f0c5a36d 100644 --- a/tests/stats/test_core_stats_functions.py +++ b/tests/stats/test_core_stats_functions.py @@ -575,6 +575,68 @@ def test_issue_468(): assert H.edges.size.ashist().equals(df) + + +def test_ashist_attrs_exist(): + """Test that ashist returns DataFrame with expected attributes.""" + H = xgi.sunflower(3, 1, 20) + df = H.edges.size.ashist() + + # Check that all expected attributes exist + assert 'xlabel' in df.attrs + assert 'ylabel' in df.attrs + assert 'title' in df.attrs + + +def test_ashist_density_labels(): + """Test that ylabel changes based on density parameter.""" + H = xgi.sunflower(3, 1, 20) + + # Test default (density=False) + df_count = H.edges.size.ashist(density=False) + assert df_count.attrs['ylabel'] == 'Count' + + # Test with density=True + df_density = H.edges.size.ashist(density=True) + assert df_density.attrs['ylabel'] == 'Probability' + + +def test_ashist_original_functionality(): + """Test that adding attributes doesn't break original functionality.""" + H = xgi.sunflower(3, 1, 20) + df = H.edges.size.ashist() + + # Original test case should still pass + expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"]) + assert df.equals(expected_df) # Original functionality + + # And should have attributes + assert 'xlabel' in df.attrs + + + +def test_ashist_single_unique_value(): + """Test ashist when there is only one unique value and multiple bins.""" + H = xgi.Hypergraph() + H.add_nodes_from(range(5)) + # All edges have the same size + H.add_edges_from([[0, 1], [2, 3], [4, 0]]) + + # The edge sizes will all be 2 + df = H.edges.size.ashist(bins=10) + + # Since there's only one unique value, bins should be set to 1 + assert len(df) == 1 # Only one bin should be present + assert df['bin_center'].iloc[0] == 2 # The bin center should be the unique value + assert df['value'].iloc[0] == 3 # There are three edges of size 2 + + # Check that attributes are present + assert 'xlabel' in df.attrs + assert 'ylabel' in df.attrs + assert 'title' in df.attrs + + + ### Attribute statistics From 76a0455914f36375b6c5da51800b897ba87c8201 Mon Sep 17 00:00:00 2001 From: will collins Date: Sun, 8 Dec 2024 15:13:51 -0500 Subject: [PATCH 8/8] Changed benhmark.yml and core.py back to normal --- .github/workflows/benchmark.yml | 5 +---- benchmarks/core.py | 17 +---------------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6b0ed8b9..64e4c433 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -3,9 +3,6 @@ on: push: branches: - main - pull_request: - branches: - - main workflow_dispatch: permissions: @@ -40,4 +37,4 @@ jobs: # Show alert with commit comment on detecting possible performance regression alert-threshold: '200%' comment-on-alert: false - fail-on-alert: true + fail-on-alert: true \ No newline at end of file diff --git a/benchmarks/core.py b/benchmarks/core.py index e1d28fb9..2aca9eb8 100644 --- a/benchmarks/core.py +++ b/benchmarks/core.py @@ -4,21 +4,6 @@ fname = "benchmarks/email-enron.json" -# Below is an example so I can better understand the control flow -def test_simple_benchmark(benchmark): - """A simple benchmark to test the workflow.""" - def setup(): - # No setup needed for this simple test - return (), {} - - def run_benchmark(): - x = 0 - for i in range(1000): - x += i - return x - - benchmark.pedantic(run_benchmark, setup=setup, rounds=rounds) - def test_construct_from_edgelist(benchmark): def setup(): H = xgi.read_hif(fname) @@ -172,4 +157,4 @@ def setup(): def dual(H): H.dual() - benchmark.pedantic(dual, setup=setup, rounds=rounds) + benchmark.pedantic(dual, setup=setup, rounds=rounds) \ No newline at end of file