From 7164198acf794312abff87048267f721f6ff2314 Mon Sep 17 00:00:00 2001 From: Niall Keleher Date: Thu, 25 Jan 2024 07:00:48 -0800 Subject: [PATCH 1/2] rename to statsframe --- CONTRIBUTING.md | 2 +- Makefile | 8 +++--- README.md | 28 +++++++++---------- docs/_quarto.yml | 6 ++-- examples/example_datasummary_skim.py | 2 +- poetry.lock | 3 +- pyproject.toml | 10 +++---- src/{pydatasummary => statsframe}/__init__.py | 6 ++-- .../_databackend.py | 0 .../_tbl_data.py | 0 src/{pydatasummary => statsframe}/ds.py | 0 tests/test_skim.py | 2 +- 12 files changed, 34 insertions(+), 33 deletions(-) rename src/{pydatasummary => statsframe}/__init__.py (70%) rename src/{pydatasummary => statsframe}/_databackend.py (100%) rename src/{pydatasummary => statsframe}/_tbl_data.py (100%) rename src/{pydatasummary => statsframe}/ds.py (100%) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index db8c882..8a9b649 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,6 +1,6 @@ # Contributing -Contributions to `pydatasummary` are welcome and encouraged! The goal of this +Contributions to `statsframe` are welcome and encouraged! The goal of this project is to make summarizing data and statistical models in python easier and more intuitive. If you have an idea for a new feature, or a bug fix, please make a suggestion. Every contribution is appreciated and will be considered. diff --git a/Makefile b/Makefile index 078e01e..9f6dce4 100644 --- a/Makefile +++ b/Makefile @@ -7,11 +7,11 @@ install-dev: style: poetry run pre-commit run --hook-stage manual --all-files -pytest-cov: - poetry run pytest --cov-report term --cov=pydatasummary tests/ +pytest-cov: install-dev + poetry run pytest --cov-report term --cov=statsframe tests/ -build: pytest-cov +build: poetry build -publish: build +publish: style build poetry publish diff --git a/README.md b/README.md index 68105cf..41a89bc 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,35 @@ -# pydatasummary +# statsframe Customizable data and model summaries in Python. -`pydatasummary` creates tables that provide descriptive statistics of +`statsframe` creates tables that provide descriptive statistics of numeric and categorical data. The goal is to provide a simple -- yet customizable -- way to summarize data and models in Python. -`pydatasummary` is heavily inspired by [`modelsummary`](https://modelsummary.com/) +`statsframe` is heavily inspired by [`modelsummary`](https://modelsummary.com/) in R. The goal is not to replicate all that `modelsummary` does, but to provide a way of achieving similar results in Python. -In order to achieve this, `pydatasummary` builds on the [`polars`](https://docs.pola.rs/) +In order to achieve this, `statsframe` builds on the [`polars`](https://docs.pola.rs/) library to produce tables that can be easily customized and exported to other formats. ## Basic Usage -As an example of `pydatasummary` usage, the `skim` function provides a +As an example of `statsframe` usage, the `skim` function provides a summary of a DataFrame (either `polars.DataFrame` or `pandas.DataFrame`). -The default summary statistics returned by `pydatasummary.skim()` are unique values, +The default summary statistics returned by `statsframe.skim()` are unique values, percentage missing, mean, standard deviation, minimum, median, and maximum. -Where possible, `pydatasummary` will print a table to the console and return a +Where possible, `statsframe` will print a table to the console and return a polars DataFrame with the summary statistics. This allows for easy customization. -For example, the `polars.DataFrame` with statistics from `pydatasummary` can be +For example, the `polars.DataFrame` with statistics from `statsframe` can be modified using the [`Great Tables`](https://posit-dev.github.io/great-tables/reference/) package. ```python import polars as pl -import pydatasummary as ds +import statsframe as ds df = ( pl.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv") @@ -61,7 +61,7 @@ We can achieve the same result above with a pandas DataFrame. ```python import pandas as pd -import pydatasummary as ds +import statsframe as ds trees_df = pd.read_csv( "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/trees.csv" @@ -84,19 +84,19 @@ Rows: 31, Columns: 3 ## Contributing If you encounter a bug, have usage questions, or want to share ideas to make -the `pydatasummary` package more useful, please feel free to file an -[issue](https://github.com/NKeleher/pydatasummary/issues). +the `statsframe` package more useful, please feel free to file an +[issue](https://github.com/NKeleher/statsframe/issues). ## Code of Conduct -Please note that the **pydatasummary** project is released with a +Please note that the **statsframe** project is released with a [contributor code of conduct](https://www.contributor-covenant.org/version/2/1/code_of_conduct/). By participating in this project you agree to abide by its terms. ## License -**pydatasummary** is licensed under the MIT license. +**statsframe** is licensed under the MIT license. ## Governance diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 98ebba7..ba4e883 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -3,7 +3,7 @@ project: type: website output-dir: . # website: -# title: "pydatasummary" +# title: "statsframe" # favicon: favicon.ico # twitter-card: true # navbar: @@ -27,12 +27,12 @@ project: # - _sidebar.yml # quartodoc: -# package: pydatasummary +# package: statsframe # parser: google # sidebar: _sidebar.yml # sections: # - title: "Function reference" -# desc: "What pydatasummary's functions do" +# desc: "What statsframe's functions do" # contents: # - skim diff --git a/examples/example_datasummary_skim.py b/examples/example_datasummary_skim.py index 2dfe85e..9cfd720 100644 --- a/examples/example_datasummary_skim.py +++ b/examples/example_datasummary_skim.py @@ -2,7 +2,7 @@ import pandas as pd import polars as pl -import pydatasummary as ds +import statsframe as ds # %% df = pl.read_csv( diff --git a/poetry.lock b/poetry.lock index 75e402e..c141929 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2015,6 +2015,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2714,4 +2715,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "7c4bbf941fa8aa847a8faae068b4e25b2df1a9ae0c360b8d261e68171a2992b7" +content-hash = "e6306b8828478a388d4373d51d79883f75ca1dde27537b5e993d4ab5d01cf0fa" diff --git a/pyproject.toml b/pyproject.toml index 40d7390..8da1f90 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [tool.poetry] -name = "pydatasummary" -version = "0.0.2" +name = "statsframe" +version = "0.0.1" description = "Customizable data and model summaries in Python." authors = ["Niall Keleher "] packages = [{ include = "*", from = "src" }] license = "MIT" readme = "README.md" -homepage = "https://github.com/NKeleher/pydatasummary#readme" -repository = "https://github.com/NKeleher/pydatasummary" +homepage = "https://github.com/NKeleher/statsframe#readme" +repository = "https://github.com/NKeleher/statsframe" keywords = ["tables", "statistics", "econometrics"] classifiers = [ # https://pypi.org/classifiers/ @@ -29,6 +29,7 @@ classifiers = [ python = "^3.9" polars = "^0.20.5" pandas = "^2.1.4" +importlib-metadata = "^7.0.1" [tool.poetry.group.dev.dependencies] pytest = "^7.4.4" @@ -40,7 +41,6 @@ bandit = "^1.7.6" docformatter = "^1.7.5" mypy = "^1.8.0" jupyterlab = "^4.0.11" -importlib-metadata = "^7.0.1" [build-system] requires = ["poetry-core"] diff --git a/src/pydatasummary/__init__.py b/src/statsframe/__init__.py similarity index 70% rename from src/pydatasummary/__init__.py rename to src/statsframe/__init__.py index 432d05e..9f7a0e2 100644 --- a/src/pydatasummary/__init__.py +++ b/src/statsframe/__init__.py @@ -1,12 +1,12 @@ -# Define pydatasummary version +# Define statsframe version from importlib_metadata import version as _v # __version__ = "0.0.1" -__version__ = _v("pydatasummary") +__version__ = _v("statsframe") del _v -# Import pydatasummary objects +# Import statsframe objects # from ._tbl_data import * # noqa: F401, F403, E402 # from ._databackend import * # noqa: F401, F403, E402 from .ds import * # noqa: F401, F403, E402 diff --git a/src/pydatasummary/_databackend.py b/src/statsframe/_databackend.py similarity index 100% rename from src/pydatasummary/_databackend.py rename to src/statsframe/_databackend.py diff --git a/src/pydatasummary/_tbl_data.py b/src/statsframe/_tbl_data.py similarity index 100% rename from src/pydatasummary/_tbl_data.py rename to src/statsframe/_tbl_data.py diff --git a/src/pydatasummary/ds.py b/src/statsframe/ds.py similarity index 100% rename from src/pydatasummary/ds.py rename to src/statsframe/ds.py diff --git a/tests/test_skim.py b/tests/test_skim.py index c2bf66f..dc3ff46 100644 --- a/tests/test_skim.py +++ b/tests/test_skim.py @@ -1,7 +1,7 @@ import polars as pl from polars.testing import assert_frame_equal -from pydatasummary.ds import skim +from statsframe.ds import skim df = pl.DataFrame( { From 4c4d0ed50ee378248ff0bf571b837cac81f352f8 Mon Sep 17 00:00:00 2001 From: Niall Keleher Date: Thu, 25 Jan 2024 07:23:39 -0800 Subject: [PATCH 2/2] import satsframe as sf --- README.md | 8 ++++---- examples/example_datasummary_skim.py | 14 +++++++------- pyproject.toml | 2 +- src/statsframe/__init__.py | 2 +- src/statsframe/{ds.py => datasummary.py} | 2 +- tests/test_skim.py | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) rename src/statsframe/{ds.py => datasummary.py} (99%) diff --git a/README.md b/README.md index 41a89bc..328c6c2 100644 --- a/README.md +++ b/README.md @@ -29,14 +29,14 @@ modified using the [`Great Tables`](https://posit-dev.github.io/great-tables/ref ```python import polars as pl -import statsframe as ds +import statsframe as sf df = ( pl.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv") .drop("rownames") ) -stats = ds.skim(df) +stats = sf.skim(df) Summary Statistics Rows: 32, Columns: 11 @@ -61,13 +61,13 @@ We can achieve the same result above with a pandas DataFrame. ```python import pandas as pd -import statsframe as ds +import statsframe as sf trees_df = pd.read_csv( "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/trees.csv" ).drop(columns=["rownames"]) -trees_stats = ds.skim(trees_df) +trees_stats = sf.skim(trees_df) Summary Statistics Rows: 31, Columns: 3 diff --git a/examples/example_datasummary_skim.py b/examples/example_datasummary_skim.py index 9cfd720..2b4afdc 100644 --- a/examples/example_datasummary_skim.py +++ b/examples/example_datasummary_skim.py @@ -2,14 +2,14 @@ import pandas as pd import polars as pl -import statsframe as ds +import statsframe as sf # %% df = pl.read_csv( "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/mtcars.csv" ).drop("rownames") -stats = ds.skim(df) +stats = sf.skim(df) # %% [markdown] # Import a csv file to a polars DataFrame: @@ -26,7 +26,7 @@ # Create a skim table # %% -penguins_stats = ds.skim(penguins_df) +penguins_stats = sf.skim(penguins_df) # %% [markdown] # Return the polars DataFrame with the summary statistics @@ -35,7 +35,7 @@ penguins_stats # %% -ds.skim( +sf.skim( penguins_df, output="markdown", title="Palmer's Penguins Summary Statistics", @@ -43,7 +43,7 @@ ) # %% -ds.skim( +sf.skim( penguins_df, stats="moments", output="markdown", @@ -52,7 +52,7 @@ ) # %% -ds.skim( +sf.skim( penguins_df, stats="full", output="markdown", @@ -72,6 +72,6 @@ trees_df.info() # %% -trees_stats = ds.skim(trees_df) +trees_stats = sf.skim(trees_df) # %% diff --git a/pyproject.toml b/pyproject.toml index 8da1f90..c4c2e78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "statsframe" -version = "0.0.1" +version = "0.0.2" description = "Customizable data and model summaries in Python." authors = ["Niall Keleher "] packages = [{ include = "*", from = "src" }] diff --git a/src/statsframe/__init__.py b/src/statsframe/__init__.py index 9f7a0e2..c411ed8 100644 --- a/src/statsframe/__init__.py +++ b/src/statsframe/__init__.py @@ -9,4 +9,4 @@ # Import statsframe objects # from ._tbl_data import * # noqa: F401, F403, E402 # from ._databackend import * # noqa: F401, F403, E402 -from .ds import * # noqa: F401, F403, E402 +from .datasummary import * # noqa: F401, F403, E402 diff --git a/src/statsframe/ds.py b/src/statsframe/datasummary.py similarity index 99% rename from src/statsframe/ds.py rename to src/statsframe/datasummary.py index 81aa75c..7785730 100644 --- a/src/statsframe/ds.py +++ b/src/statsframe/datasummary.py @@ -1,6 +1,6 @@ from __future__ import annotations -# Main ds imports ---- +# Main sf.imports ---- import pandas as pd import polars as pl import polars.selectors as cs diff --git a/tests/test_skim.py b/tests/test_skim.py index dc3ff46..af8ecea 100644 --- a/tests/test_skim.py +++ b/tests/test_skim.py @@ -1,7 +1,7 @@ import polars as pl from polars.testing import assert_frame_equal -from statsframe.ds import skim +import statsframe as sf df = pl.DataFrame( { @@ -28,7 +28,7 @@ def test_skim_numeric_df(data=df): # Act - result = skim(data) + result = sf.skim(data) # Assert assert_frame_equal(result, expected_df)