Skip to content

Commit

Permalink
Merge pull request #109 from janpipek/stats_header
Browse files Browse the repository at this point in the history
Stats header + median
  • Loading branch information
janpipek authored Jun 6, 2024
2 parents d656e4c + dfb5733 commit 1b6b7e3
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 16 deletions.
9 changes: 9 additions & 0 deletions HISTORY.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
0.7.2 (?? Jun 2024)
-------------------
- Improved: stats box in plots

0.7.1 (6 Jun 2024)
------------------
- Fixed: Newer polars compatibility, deal with Nones
- Added: .physt accessors for polars series and dataframes

0.7.0 (31 May 2024)
-------------------
- Added: Support for pola.rs series/dataframes
Expand Down
36 changes: 28 additions & 8 deletions doc/plotting.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions src/physt/_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,10 @@ def calculate_1d_frequencies(
raise ValueError(
f"Weights must have the same shape as data, {weights_array.shape} != {data_array.shape}"
)
equal_weights = weights_array.max() - weights_array.min() == 0
else:
weights_array = np.ones_like(data_array, dtype=int)
equal_weights = True

# Prepare dtype
inferred_dtype: np.dtype = np.dtype(dtype or weights_array.dtype)
Expand Down Expand Up @@ -483,6 +485,8 @@ def calculate_1d_frequencies(
min=float(data_array.min()),
max=float(data_array.max()),
weight=float(weights_array.sum()),
# TODO: Support median with weights?
median=np.median(data_array) if equal_weights else np.nan,
)
return frequencies, errors2, underflow, overflow, stats

Expand Down
2 changes: 2 additions & 0 deletions src/physt/plotting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
If True, display a small box with statistical info
stats_loc: 1 | 2 | 3 | 4 | "upper right" | "upper left" | "lower left" | "lower right"
Where to display the stats box (similar to legend)
stats_title:
What is displayed in the stats box header
2D heatmaps
~~~~~~~~~~~
Expand Down
25 changes: 17 additions & 8 deletions src/physt/plotting/matplotlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,7 @@ def _add_stats_box(
h1: Histogram1D,
ax: Axes,
stats: Union[str, bool, Collection[str]] = "all",
title: Optional[str] = None,
loc: Union[int, str, None] = None,
) -> None:
"""Insert a small legend-like box with statistical information.
Expand All @@ -993,6 +994,7 @@ def _add_stats_box(
stats : False | "all" or True | field | list of fields
What info to display
loc: As with legend(loc=...) but we do not support auto
title: What is displayed above the stats_box
Note
----
Expand All @@ -1005,6 +1007,7 @@ def _add_stats_box(
"underflow",
"overflow",
"std",
"median",
"total",
]

Expand All @@ -1027,6 +1030,8 @@ def _add_stats_box(
used_stats = list(stats) # type: ignore

text_frags = []
if title:
text_frags.append(title)
if "total" in used_stats:
text_frags.append(f"Total: {h1.total}")
if "underflow" in used_stats:
Expand All @@ -1036,21 +1041,25 @@ def _add_stats_box(
if h1.overflow:
text_frags.append(f"Overflow: {h1.overflow}")
if "mean" in used_stats:
mean = h1.mean()
mean = h1.statistics.mean()
if mean is not None:
text_frags.append(f"Mean: {mean:.2f}")
if "median" in used_stats:
median = h1.statistics.median
if median is not None:
text_frags.append(f"Median: {median:.2f}")
if "std" in used_stats:
std = h1.std()
std = h1.statistics.std()
if std is not None:
text_frags.append(f"Std.dev: {std:.2f}")
if "min" in used_stats:
min = h1.min()
if min is not None:
text_frags.append(f"Min.: {min:.2f}")
min_ = h1.statistics.min
if min_ is not None:
text_frags.append(f"Min.: {min_:.2f}")
if "max" in used_stats:
max = h1.max()
if max is not None:
text_frags.append(f"Max.: {max:.2f}")
max_ = h1.statistics.max
if max_ is not None:
text_frags.append(f"Max.: {max_:.2f}")

text = "\n".join(text_frags)

Expand Down
5 changes: 5 additions & 0 deletions src/physt/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
class Statistics:
"""Container of statistics accumulative data."""

# TODO: Reconsider changing mean, std to properties

sum: float = 0.0
"""Weighted sum of all values entered into histogram."""

Expand All @@ -30,6 +32,8 @@ class Statistics:
weight: float = 0.0
"""The total weight of values used to construct the histogram."""

median: float = np.nan

def mean(self) -> float:
"""Statistical mean of all values entered into histogram (weighted)."""
try:
Expand Down Expand Up @@ -63,6 +67,7 @@ def __add__(self, other: Any) -> Statistics:
min=min(self.min, other.min),
max=max(self.max, other.max),
weight=self.weight + other.weight,
median=np.nan,
)

def __mul__(self, other: Any) -> Statistics:
Expand Down
6 changes: 6 additions & 0 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def test_weight(self, histogram, use_weights):
else:
assert histogram.statistics.weight == 4

def test_median(self, histogram, use_weights):
if use_weights:
assert np.isnan(histogram.statistics.median)
else:
assert histogram.statistics.median == 2.5

def test_mean(self, histogram, use_weights):
if use_weights:
assert histogram.statistics.mean() == 2.8
Expand Down

0 comments on commit 1b6b7e3

Please sign in to comment.