Skip to content

Commit

Permalink
Issue#915: Fixed the Error for large integers in Series
Browse files Browse the repository at this point in the history
  • Loading branch information
Sohaib90 committed Jan 20, 2023
1 parent ea29771 commit 4486a2c
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/pandas_profiling/model/summary_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def histogram_compute(
stats = {}
bins = config.plot.histogram.bins
bins_arg = "auto" if bins == 0 else min(bins, n_unique)
bins_arg = np.histogram_bin_edges(finite_values, bins=bins_arg)
stats[name] = np.histogram(finite_values, bins=bins_arg, weights=weights)

max_bins = config.plot.histogram.max_bins
Expand All @@ -49,7 +50,8 @@ def chi_square(
values: Optional[np.ndarray] = None, histogram: Optional[np.ndarray] = None
) -> dict:
if histogram is None:
histogram, _ = np.histogram(values, bins="auto")
bins = bins = np.histogram_bin_edges(values, bins='auto')
histogram, _ = np.histogram(values, bins=bins)
return dict(chisquare(histogram)._asdict())


Expand Down
30 changes: 30 additions & 0 deletions tests/issues/test_issue915.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Test for issue 915:
https://github.com/ydataai/pandas-profiling/issues/915
Error for series with large integers.
"""
import fnmatch
import pandas as pd
from pandas_profiling import ProfileReport

def test_issue915():
df = pd.DataFrame({"col": pd.Series([716277643516076032 + i for i in range(100)])})
df_profile = ProfileReport(df)

def test_with_value(n_extreme_obs):
"""Generate HTML and validate the tabs contain the proper tab titles."""
df_profile.config.n_extreme_obs = n_extreme_obs
df_profile.invalidate_cache()

reg_min = f"*<a href=* aria-controls=* role=tab data-toggle=tab>Minimum {n_extreme_obs} values</a>*"
reg_max = f"*<a href=* aria-controls=* role=tab data-toggle=tab>Maximum {n_extreme_obs} values</a>*"

profile_html = df_profile.to_html()

assert fnmatch.fnmatch(profile_html, reg_min)
assert fnmatch.fnmatch(profile_html, reg_max)

test_with_value(5)
test_with_value(100)
test_with_value(120)

0 comments on commit 4486a2c

Please sign in to comment.