Skip to content

Commit

Permalink
Added a check for wrong column names and a test for this check (#586)
Browse files Browse the repository at this point in the history
* Added a check for wrong column names and a test for this check

* Old python versions are so ugly...

* Make sure to check only once converted to strings
  • Loading branch information
nils-braun authored Nov 19, 2019
1 parent 7562b0d commit 11e5c07
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
6 changes: 6 additions & 0 deletions tests/units/utilities/test_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ def test_with_wrong_input(self):
self.assertRaises(ValueError, dataframe_functions._normalize_input_to_internal_representation, test_df,
None, None, None, "value")

test_df = pd.DataFrame([{"id": 0, "a_": 3, "b": 5, "sort": 1}])
self.assertRaises(ValueError, dataframe_functions._normalize_input_to_internal_representation, test_df, "id", "sort", None, None)

test_df = pd.DataFrame([{"id": 0, "a__c": 3, "b": 5, "sort": 1}])
self.assertRaises(ValueError, dataframe_functions._normalize_input_to_internal_representation, test_df, "id", "sort", None, None)

test_df = pd.DataFrame([{"id": 0}])
self.assertRaises(ValueError, dataframe_functions._normalize_input_to_internal_representation, test_df,
"id", None, None, None)
Expand Down
8 changes: 8 additions & 0 deletions tsfresh/utilities/dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,14 @@ def _normalize_input_to_internal_representation(timeseries_container, column_id,
# The kind columns should always be of type "str" to make the inference of feature settings later in `from_columns`
# work
timeseries_container[column_kind] = timeseries_container[column_kind].astype(str)

# Make sure we have only parsable names
for kind in timeseries_container[column_kind].unique():
if kind.endswith("_"):
raise ValueError("The kind {kind} is not allowed to end with '_'".format(kind=kind))
if "__" in kind:
raise ValueError("The kind {kind} is not allowed to contain '__'".format(kind=kind))

return timeseries_container, column_id, column_kind, column_value


Expand Down

0 comments on commit 11e5c07

Please sign in to comment.