Skip to content

Commit

Permalink
respect order of categorical vars. ref #93
Browse files Browse the repository at this point in the history
  • Loading branch information
tompollard committed May 7, 2020
1 parent dc831f5 commit d26c91c
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
20 changes: 18 additions & 2 deletions tableone/tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,10 +245,26 @@ def __init__(self, data, columns=None, categorical=None, groupby=None,
warnings.warn(msg)
pval_adjust = "bonferroni"

# ensure that values to order are strings
# if custom order is provided, ensure that values are strings
if order:
order = {k: ["{}".format(v) for v in order[k]] for k in order}

# if input df has ordered categorical variables, get the order.
order_cats = [x for x in data.select_dtypes("category")
if data[x].dtype.ordered]
if any(order_cats):
d_order_cats = {v: data[v].cat.categories for v in order_cats}
d_order_cats = {k: ["{}".format(v) for v in d_order_cats[k]]
for k in d_order_cats}

# combine the orders. custom order takes precedence.
if order_cats and order:
new = {**order, **d_order_cats}
for k in order:
order[k] = ["{}".format(v) for v in order[k]]
new[k] = order[k] + [x for x in new[k] if x not in order[k]]
order = new
elif order_cats:
order = d_order_cats

if pval and not groupby:
raise InputError("If pval=True then groupby must be specified.")
Expand Down
46 changes: 45 additions & 1 deletion test_tableone.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import warnings

from nose.tools import (with_setup, assert_raises, assert_equal,
assert_almost_equal)
assert_almost_equal, assert_list_equal)
import numpy as np
import pandas as pd
from scipy import stats
Expand Down Expand Up @@ -1011,3 +1011,47 @@ def test_compute_standardized_mean_difference_categorical(self):
for k in exp_smd:
smd = t.tableone.loc[k, 'Grouped by MechVent']['SMD (0,1)'][0]
assert_equal(smd, exp_smd[k])

@with_setup(setup, teardown)
def test_order_of_order_categorical_columns(self):
"""
Test that the order of ordered categorical columns is retained.
"""
day_cat = pd.Categorical(["mon", "wed", "tue", "thu"],
categories=["wed", "thu", "mon", "tue"], ordered=True)

alph_cat = pd.Categorical(["a", "b", "c", "a"],
categories=["b", "c", "d", "a"], ordered=False)

mon_cat = pd.Categorical(["jan", "feb", "mar", "apr"],
categories=["feb", "jan", "mar", "apr"], ordered=True)

data = pd.DataFrame({"A": ["a", "b", "c", "a"]})
data["day"] = day_cat
data["alph"] = alph_cat
data["month"] = mon_cat

order = {"month": ["jan"], "day": ["mon", "tue", "wed"]}

# if a custom order is not specified, the categorical order
# specified above should apply
t1 = TableOne(data, label_suffix=False)

t1_expected_order = {'month': ["feb", "jan", "mar", "apr"],
'day': ["wed", "thu", "mon", "tue"]}

for k in order:
assert_list_equal(t1._order[k], t1_expected_order[k])
assert_list_equal(t1.tableone.loc[k].index.to_list(),
t1_expected_order[k])

# if a desired order is set, it should override the order
t2 = TableOne(data, order=order, label_suffix=False)

t2_expected_order = {'month': ["jan", "feb", "mar", "apr"],
'day': ["mon", "tue", "wed", "thu"]}

for k in order:
assert_list_equal(t2._order[k], t2_expected_order[k])
assert_list_equal(t2.tableone.loc[k].index.to_list(),
t2_expected_order[k])

0 comments on commit d26c91c

Please sign in to comment.