diff --git a/src/pandas_profiling/model/summary_algorithms.py b/src/pandas_profiling/model/summary_algorithms.py index de09144ad..014ea5a1e 100644 --- a/src/pandas_profiling/model/summary_algorithms.py +++ b/src/pandas_profiling/model/summary_algorithms.py @@ -36,6 +36,7 @@ def histogram_compute( stats = {} bins = config.plot.histogram.bins bins_arg = "auto" if bins == 0 else min(bins, n_unique) + bins_arg = np.histogram_bin_edges(finite_values, bins=bins_arg) stats[name] = np.histogram(finite_values, bins=bins_arg, weights=weights) max_bins = config.plot.histogram.max_bins @@ -49,7 +50,8 @@ def chi_square( values: Optional[np.ndarray] = None, histogram: Optional[np.ndarray] = None ) -> dict: if histogram is None: - histogram, _ = np.histogram(values, bins="auto") + bins = bins = np.histogram_bin_edges(values, bins='auto') + histogram, _ = np.histogram(values, bins=bins) return dict(chisquare(histogram)._asdict()) diff --git a/tests/issues/test_issue915.py b/tests/issues/test_issue915.py new file mode 100644 index 000000000..15ea33146 --- /dev/null +++ b/tests/issues/test_issue915.py @@ -0,0 +1,30 @@ +""" +Test for issue 915: +https://github.com/ydataai/pandas-profiling/issues/915 + +Error for series with large integers. +""" +import fnmatch +import pandas as pd +from pandas_profiling import ProfileReport + +def test_issue915(): + df = pd.DataFrame({"col": pd.Series([716277643516076032 + i for i in range(100)])}) + df_profile = ProfileReport(df) + + def test_with_value(n_extreme_obs): + """Generate HTML and validate the tabs contain the proper tab titles.""" + df_profile.config.n_extreme_obs = n_extreme_obs + df_profile.invalidate_cache() + + reg_min = f"*Minimum {n_extreme_obs} values*" + reg_max = f"*Maximum {n_extreme_obs} values*" + + profile_html = df_profile.to_html() + + assert fnmatch.fnmatch(profile_html, reg_min) + assert fnmatch.fnmatch(profile_html, reg_max) + + test_with_value(5) + test_with_value(100) + test_with_value(120) \ No newline at end of file