Skip to content

Commit

Permalink
Add metadata attributes to ashist() for improved plotting (#635)
Browse files Browse the repository at this point in the history
* test: Add simple benchmark test

* test: Add pedantic benchmark test to understand workflow

* Attempting to change results from data.js

* Reverted benchmarks.yml back to normal

* Added pull_request to benchmark.yml workflow to understand what happens when a pull request is made

* Added two branches for each fix and added ashist function to add-metadata-attributes branch

* Added unittests to add-metadata-attributes

* Changed benhmark.yml and core.py back to normal
  • Loading branch information
willcollins10 authored Dec 10, 2024
1 parent ade37fb commit e4b3c3b
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 3 deletions.
2 changes: 1 addition & 1 deletion benchmarks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ def setup():
def dual(H):
H.dual()

benchmark.pedantic(dual, setup=setup, rounds=rounds)
benchmark.pedantic(dual, setup=setup, rounds=rounds)
62 changes: 62 additions & 0 deletions tests/stats/test_core_stats_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,68 @@ def test_issue_468():
assert H.edges.size.ashist().equals(df)




def test_ashist_attrs_exist():
"""Test that ashist returns DataFrame with expected attributes."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Check that all expected attributes exist
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs


def test_ashist_density_labels():
"""Test that ylabel changes based on density parameter."""
H = xgi.sunflower(3, 1, 20)

# Test default (density=False)
df_count = H.edges.size.ashist(density=False)
assert df_count.attrs['ylabel'] == 'Count'

# Test with density=True
df_density = H.edges.size.ashist(density=True)
assert df_density.attrs['ylabel'] == 'Probability'


def test_ashist_original_functionality():
"""Test that adding attributes doesn't break original functionality."""
H = xgi.sunflower(3, 1, 20)
df = H.edges.size.ashist()

# Original test case should still pass
expected_df = pd.DataFrame([[20.0, 3]], columns=["bin_center", "value"])
assert df.equals(expected_df) # Original functionality

# And should have attributes
assert 'xlabel' in df.attrs



def test_ashist_single_unique_value():
"""Test ashist when there is only one unique value and multiple bins."""
H = xgi.Hypergraph()
H.add_nodes_from(range(5))
# All edges have the same size
H.add_edges_from([[0, 1], [2, 3], [4, 0]])

# The edge sizes will all be 2
df = H.edges.size.ashist(bins=10)

# Since there's only one unique value, bins should be set to 1
assert len(df) == 1 # Only one bin should be present
assert df['bin_center'].iloc[0] == 2 # The bin center should be the unique value
assert df['value'].iloc[0] == 3 # There are three edges of size 2

# Check that attributes are present
assert 'xlabel' in df.attrs
assert 'ylabel' in df.attrs
assert 'title' in df.attrs



### Attribute statistics


Expand Down
21 changes: 19 additions & 2 deletions xgi/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def aspandas(self):
"""
return pd.Series(self._val, name=self.name)


def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
"""Return the distribution of a numpy array.
Expand All @@ -180,7 +181,6 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
Whether to bin the values with log-sized bins.
By default, False.
Returns
-------
Pandas DataFrame
Expand All @@ -189,6 +189,11 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
is True, outputs two additional columns, `bin_lo` and `bin_hi`,
which outputs the left and right bin edges respectively.
The DataFrame includes the following attributes:
- attrs['xlabel']: Label for x-axis
- attrs['ylabel']: 'Count' or 'Probability' based on density parameter
- attrs['title']: Plot title
Notes
-----
Originally from https://github.com/jkbren/networks-and-dataviz
Expand All @@ -199,7 +204,19 @@ def ashist(self, bins=10, bin_edges=False, density=False, log_binning=False):
if isinstance(bins, int) and len(set(self.aslist())) == 1:
bins = 1

return hist(self.asnumpy(), bins, bin_edges, density, log_binning)
# My modifications below

# Get the histogram Dataframe
df = hist(self.asnumpy(), bins, bin_edges, density, log_binning)

# Add metadata attributes
df.attrs["xlabel"] = "Value"
df.attrs["ylabel"] = "Probability" if density else "Count"
df.attrs["title"] = "Histogram"

return df



def max(self):
"""The maximum value of this stat."""
Expand Down

0 comments on commit e4b3c3b

Please sign in to comment.