Skip to content

Commit

Permalink
Merge pull request #415 from PaulWestenthanner/fix/issue_414_412
Browse files Browse the repository at this point in the history
Fix/issue 414 412
  • Loading branch information
PaulWestenthanner committed Aug 15, 2023
2 parents eeff8e3 + 6f412cc commit d73479e
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 12 deletions.
2 changes: 1 addition & 1 deletion category_encoders/backward_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class BackwardDifferenceEncoder(BaseContrastEncoder):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""

Expand Down
2 changes: 1 addition & 1 deletion category_encoders/base_contrast_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class BaseContrastEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""
prefit_ordinal = True
Expand Down
4 changes: 2 additions & 2 deletions category_encoders/helmert.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ class HelmertEncoder(BaseContrastEncoder):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""
def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix:
return Helmert().code_without_intercept(values_to_encode)
4 changes: 2 additions & 2 deletions category_encoders/one_hot.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ class OneHotEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""
prefit_ordinal = True
encoding_relation = util.EncodingRelation.ONE_TO_N_UNIQUE
Expand Down
5 changes: 2 additions & 3 deletions category_encoders/ordinal.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ class OrdinalEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""
prefit_ordinal = False
encoding_relation = util.EncodingRelation.ONE_TO_ONE
Expand Down Expand Up @@ -225,7 +224,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
if pd.isna(categories).any():
categories = [c for c in categories if not pd.isna(c)] + [nan_identity]
else:
categories = categories.tolist()
categories = list(categories)
if util.is_category(X[col].dtype):
# Avoid using pandas category dtype meta-data if possible, see #235, #238.
if X[col].dtype.ordered:
Expand Down
4 changes: 2 additions & 2 deletions category_encoders/polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ class PolynomialEncoder(BaseContrastEncoder):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""
def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix:
return Poly().code_without_intercept(values_to_encode)
2 changes: 1 addition & 1 deletion category_encoders/sum_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class SumEncoder(BaseContrastEncoder):
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
"""

Expand Down
27 changes: 27 additions & 0 deletions tests/test_ordinal.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,33 @@ def test_HaveNaNInTrain_ExpectCodedAsOne(self):

self.assertEqual(expected, result)

def test_Timestamp(self):
df = pd.DataFrame(
{
"timestamps": {
0: pd.Timestamp("1997-09-03 00:00:00"),
1: pd.Timestamp("1997-09-03 00:00:00"),
2: pd.Timestamp("2000-09-03 00:00:00"),
3: pd.Timestamp("1997-09-03 00:00:00"),
4: pd.Timestamp("1999-09-04 00:00:00"),
5: pd.Timestamp("2001-09-03 00:00:00"),
},
}
)
enc = encoders.OrdinalEncoder(cols=["timestamps"])
encoded_df = enc.fit_transform(df)
expected_index = [pd.Timestamp("1997-09-03 00:00:00"),
pd.Timestamp("2000-09-03 00:00:00"),
pd.Timestamp("1999-09-04 00:00:00"),
pd.Timestamp("2001-09-03 00:00:00"),
pd.NaT
]
expected_mapping = pd.Series([1, 2, 3, 4, -2], index=expected_index)
expected_values = [1, 1, 2, 1, 3, 4]

pd.testing.assert_series_equal(expected_mapping, enc.mapping[0]["mapping"])
self.assertListEqual(expected_values, encoded_df["timestamps"].tolist())

def test_NoGaps(self):
train = pd.DataFrame({"city": ["New York", np.nan, "Rio", None, "Rosenheim"]})
expected_mapping_value = pd.Series([1, 2, 3, 4], index=["New York", "Rio", "Rosenheim", np.nan])
Expand Down

0 comments on commit d73479e

Please sign in to comment.