diff --git a/AUTHORS.md b/AUTHORS.md index b59812dad..e689503fa 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -112,3 +112,4 @@ Contributors - [@asmirnov69](https://github.com/asmirnov69) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%asmirnov69) - [@xujiboy](https://github.com/xujiboy) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%xujiboy) - [@joranbeasley](https://github.com/joranbeasley) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/issues?q=is%3Aclosed+mentions%joranbeasley) +-[@kianmeng](https://github.com/kianmeng) | [contributions](https://github.com/pyjanitor-devs/pyjanitor/pull/1290#issue-1906020324) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1bff4a3f..6ec1b6275 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,7 @@ # Changelog ## [Unreleased] - +- [ENH] `select` function now supports variable arguments - PR #1288 @samukweku ## [v0.26.0] - 2023-09-18 - [ENH] `clean_names` can now be applied to column values. Issue #995 @samukweku diff --git a/janitor/functions/currency_column_to_numeric.py b/janitor/functions/currency_column_to_numeric.py index ac6644dfa..1194caa06 100644 --- a/janitor/functions/currency_column_to_numeric.py +++ b/janitor/functions/currency_column_to_numeric.py @@ -92,10 +92,16 @@ def currency_column_to_numeric( column_series = df[column_name] if cleaning_style == "accounting": - df.loc[:, column_name] = df[column_name].apply( - _clean_accounting_column + outcome = ( + df[column_name] + .str.strip() + .str.replace(",", "", regex=False) + .str.replace(")", "", regex=False) + .str.replace("(", "-", regex=False) + .replace({"-": 0.0}) + .astype(float) ) - return df + return df.assign(**{column_name: outcome}) if cleaning_style is not None: raise ValueError( "`cleaning_style` is expected to be one of ('accounting', None). " @@ -130,29 +136,6 @@ def currency_column_to_numeric( return df -def _clean_accounting_column(x: str) -> float: - """Perform the logic for the "accounting" cleaning style. - - This is a private function, not intended to be used outside of - `currency_column_to_numeric``. - - It is intended to be used in a pandas `apply` method. - - Args: - x: A string representing currency. - - Returns: - A float representing currency. - """ - y = x.strip() - y = y.replace(",", "") - y = y.replace(")", "") - y = y.replace("(", "-") - if y == "-": - return 0.00 - return float(y) - - def _currency_column_to_numeric( x: str, cast_non_numeric: Optional[dict] = None, diff --git a/janitor/functions/filter.py b/janitor/functions/filter.py index 54f720a4c..9a87e257b 100644 --- a/janitor/functions/filter.py +++ b/janitor/functions/filter.py @@ -260,31 +260,28 @@ def _date_filter_conditions(conditions): """Taken from: https://stackoverflow.com/a/13616382.""" return reduce(np.logical_and, conditions) - if column_date_options: - df.loc[:, column_name] = pd.to_datetime( - df.loc[:, column_name], **column_date_options - ) - else: - df.loc[:, column_name] = pd.to_datetime(df.loc[:, column_name]) + if column_date_options is None: + column_date_options = {} + df[column_name] = pd.to_datetime(df[column_name], **column_date_options) _filter_list = [] if start_date: start_date = pd.to_datetime(start_date, format=format) - _filter_list.append(df.loc[:, column_name] >= start_date) + _filter_list.append(df[column_name] >= start_date) if end_date: end_date = pd.to_datetime(end_date, format=format) - _filter_list.append(df.loc[:, column_name] <= end_date) + _filter_list.append(df[column_name] <= end_date) if years: - _filter_list.append(df.loc[:, column_name].dt.year.isin(years)) + _filter_list.append(df[column_name].dt.year.isin(years)) if months: - _filter_list.append(df.loc[:, column_name].dt.month.isin(months)) + _filter_list.append(df[column_name].dt.month.isin(months)) if days: - _filter_list.append(df.loc[:, column_name].dt.day.isin(days)) + _filter_list.append(df[column_name].dt.day.isin(days)) if start_date and end_date and start_date > end_date: warnings.warn( diff --git a/janitor/functions/select.py b/janitor/functions/select.py index 3b6a3b71c..2865d0d6a 100644 --- a/janitor/functions/select.py +++ b/janitor/functions/select.py @@ -1,12 +1,18 @@ from typing import Any import pandas_flavor as pf import pandas as pd -from janitor.utils import deprecated_alias +from janitor.utils import refactored_function +from janitor.utils import check, deprecated_alias from janitor.functions.utils import _select, DropLabel # noqa: F401 @pf.register_dataframe_method -@deprecated_alias(search_cols="search_column_names") +@refactored_function( + message=( + "This function will be deprecated in a 1.x release. " + "Please use `jn.select` instead." + ) +) def select_columns( df: pd.DataFrame, *args: Any, @@ -30,6 +36,11 @@ def select_columns( is with `.loc` or `.iloc` methods. `select_columns` is primarily for convenience. + !!!note + + This function will be deprecated in a 1.x release. + Please use `jn.select` instead. + Examples: >>> import pandas as pd >>> import janitor @@ -209,19 +220,26 @@ class mammal a callable, or variable arguments of all the aforementioned. A sequence of booleans is also acceptable. - A dictionary can be used for selection on a MultiIndex on different levels. + A dictionary can be used for selection + on a MultiIndex on different levels. invert: Whether or not to invert the selection. - This will result in the selection of the complement of the columns - provided. + This will result in the selection + of the complement of the columns provided. Returns: A pandas DataFrame with the specified columns selected. """ # noqa: E501 - return _select(df, args=args, invert=invert, axis="columns") + return _select(df, columns=list(args), invert=invert) @pf.register_dataframe_method +@refactored_function( + message=( + "This function will be deprecated in a 1.x release. " + "Please use `jn.select` instead." + ) +) def select_rows( df: pd.DataFrame, *args: Any, @@ -242,13 +260,17 @@ def select_rows( !!! info "New in version 0.24.0" - !!!note The preferred option when selecting columns or rows in a Pandas DataFrame is with `.loc` or `.iloc` methods, as they are generally performant. `select_rows` is primarily for convenience. + !!!note + + This function will be deprecated in a 1.x release. + Please use `jn.select` instead. + Examples: >>> import pandas as pd >>> import janitor @@ -275,20 +297,27 @@ def select_rows( a callable, or variable arguments of all the aforementioned. A sequence of booleans is also acceptable. - A dictionary can be used for selection on a MultiIndex on different levels. + A dictionary can be used for selection + on a MultiIndex on different levels. invert: Whether or not to invert the selection. - This will result in the selection of the complement of the rows - provided. + This will result in the selection + of the complement of the rows provided. Returns: A pandas DataFrame with the specified rows selected. """ # noqa: E501 - return _select(df, args=args, invert=invert, axis="index") + return _select(df, rows=list(args), invert=invert) @pf.register_dataframe_method +@deprecated_alias(rows="index") def select( - df: pd.DataFrame, *, rows: Any = None, columns: Any = None + df: pd.DataFrame, + *args, + index: Any = None, + columns: Any = None, + axis: str = "columns", + invert: bool = False, ) -> pd.DataFrame: """Method-chainable selection of rows and columns. @@ -302,6 +331,8 @@ def select( Selection can be inverted with the `DropLabel` class. + Optional ability to invert selection of index/columns available as well. + !!! info "New in version 0.24.0" @@ -312,6 +343,12 @@ def select( is with `.loc` or `.iloc` methods, as they are generally performant. `select` is primarily for convenience. + !!! abstract "Version Changed" + + - 0.26.0 + - Added variable `args`, `invert` and `axis` parameters. + - `rows` keyword deprecated in favour of `index`. + Examples: >>> import pandas as pd >>> import janitor @@ -323,13 +360,13 @@ def select( cobra 1 2 viper 4 5 sidewinder 7 8 - >>> df.select(rows='cobra', columns='shield') + >>> df.select(index='cobra', columns='shield') shield cobra 2 Labels can be dropped with the `DropLabel` class: - >>> df.select(rows=DropLabel('cobra')) + >>> df.select(index=DropLabel('cobra')) max_speed shield viper 4 5 sidewinder 7 8 @@ -339,23 +376,54 @@ def select( Args: df: A pandas DataFrame. - rows: Valid inputs include: an exact label to look for, + *args: Valid inputs include: an exact index name to look for, + a shell-style glob string (e.g. `*_thing_*`), + a regular expression, + a callable, + or variable arguments of all the aforementioned. + A sequence of booleans is also acceptable. + A dictionary can be used for selection + on a MultiIndex on different levels. + index: Valid inputs include: an exact label to look for, a shell-style glob string (e.g. `*_thing_*`), a regular expression, a callable, or variable arguments of all the aforementioned. A sequence of booleans is also acceptable. - A dictionary can be used for selection on a MultiIndex on different levels. + A dictionary can be used for selection + on a MultiIndex on different levels. columns: Valid inputs include: an exact label to look for, a shell-style glob string (e.g. `*_thing_*`), a regular expression, a callable, or variable arguments of all the aforementioned. A sequence of booleans is also acceptable. - A dictionary can be used for selection on a MultiIndex on different levels. + A dictionary can be used for selection + on a MultiIndex on different levels. + invert: Whether or not to invert the selection. + This will result in the selection + of the complement of the rows/columns provided. + axis: Whether the selection should be on the index('index'), + or columns('columns'). + Applicable only for the variable args parameter. + + Raises: + ValueError: If args and index/columns are provided. Returns: A pandas DataFrame with the specified rows and/or columns selected. """ # noqa: E501 - return _select(df, args=None, rows=rows, columns=columns, axis="both") + if args: + check("invert", invert, [bool]) + if (index is not None) or (columns is not None): + raise ValueError( + "Either provide variable args with the axis parameter, " + "or provide arguments to the index and/or columns parameters." + ) + if axis == "index": + return _select(df, rows=list(args), columns=columns, invert=invert) + if axis == "columns": + return _select(df, columns=list(args), rows=index, invert=invert) + raise ValueError("axis should be either 'index' or 'columns'.") + return _select(df, rows=index, columns=columns, invert=invert) diff --git a/janitor/functions/utils.py b/janitor/functions/utils.py index 3c66bd792..aa936a30c 100644 --- a/janitor/functions/utils.py +++ b/janitor/functions/utils.py @@ -642,9 +642,7 @@ def get_columns(group: Union[DataFrameGroupBy, SeriesGroupBy], label): def _select( df: pd.DataFrame, - args: tuple, invert: bool = False, - axis: str = "index", rows=None, columns=None, ) -> pd.DataFrame: @@ -653,23 +651,25 @@ def _select( Returns a DataFrame. """ - assert axis in {"both", "index", "columns"} - if axis == "both": - if rows is None: - rows = slice(None) + if rows is None: + row_indexer = slice(None) + else: + outcome = _select_index([rows], df, axis="index") + if invert: + row_indexer = np.ones(df.index.size, dtype=np.bool_) + row_indexer[outcome] = False else: - rows = _select_index([rows], df, axis="index") - if columns is None: - columns = slice(None) + row_indexer = outcome + if columns is None: + column_indexer = slice(None) + else: + outcome = _select_index([columns], df, axis="columns") + if invert: + column_indexer = np.ones(df.columns.size, dtype=np.bool_) + column_indexer[outcome] = False else: - columns = _select_index([columns], df, axis="columns") - return df.iloc[rows, columns] - indices = _select_index(list(args), df, axis) - if invert: - rev = np.ones(getattr(df, axis).size, dtype=np.bool_) - rev[indices] = False - return df.iloc(axis=axis)[rev] - return df.iloc(axis=axis)[indices] + column_indexer = outcome + return df.iloc[row_indexer, column_indexer] class _JoinOperator(Enum): diff --git a/tests/functions/test_filter_date.py b/tests/functions/test_filter_date.py index a66ec1402..de9b00e82 100644 --- a/tests/functions/test_filter_date.py +++ b/tests/functions/test_filter_date.py @@ -16,19 +16,13 @@ def test_filter_date_column_name(date_dataframe): def test_filter_date_year(date_dataframe): df = date_dataframe.filter_date(column_name="DATE", years=[2020]) - def _get_year(x): - return x.year - - assert df.DATE.apply(_get_year).unique()[0] == 2020 + assert df.DATE.dt.year.unique()[0] == 2020 def test_filter_date_years(date_dataframe): df = date_dataframe.filter_date(column_name="DATE", years=[2020, 2021]) - def _get_year(x): - return x.year - - test_result = df.DATE.apply(_get_year).unique() + test_result = df.DATE.dt.year.unique() expected_result = np.array([2020, 2021]) assert np.array_equal(test_result, expected_result) @@ -37,22 +31,28 @@ def _get_year(x): def test_filter_date_month(date_dataframe): df = date_dataframe.filter_date(column_name="DATE", months=range(10, 12)) - def _get_month(x): - return x.month - - test_result = df.DATE.apply(_get_month).unique() + test_result = df.DATE.dt.month.unique() expected_result = np.array([10, 11]) assert np.array_equal(test_result, expected_result) +def test_filter_date_days(date_dataframe): + df = date_dataframe.filter_date(column_name="DATE", days=range(1, 5)) + + test_result = df.DATE.dt.day.unique() + expected_result = np.arange(1, 5) + + assert np.array_equal(test_result, expected_result) + + def test_filter_date_start(date_dataframe): start_date = "02/01/19" df = date_dataframe.filter_date(column_name="DATE", start_date=start_date) test_date = pd.to_datetime("01/31/19") - test_result = df[df.DATE <= test_date] + test_result = df[pd.to_datetime(df.DATE) <= test_date] assert test_result.empty diff --git a/tests/functions/test_pivot_wider.py b/tests/functions/test_pivot_wider.py index 51703da74..a8d363225 100644 --- a/tests/functions/test_pivot_wider.py +++ b/tests/functions/test_pivot_wider.py @@ -176,7 +176,7 @@ def test_non_unique_index_names_from_combination(): {"A": ["A", "A", "A"], "L": ["L", "L", "L"], "numbers": [30, 54, 25]} ) with pytest.raises(ValueError): - df.pivot_wider(index="A", names_from="L") + df.pivot_wider(index="A", names_from="L", values_from="numbers") def test_pivot_long_wide_long(): @@ -206,7 +206,10 @@ def test_pivot_long_wide_long(): ) result = df_in.pivot_wider( - index=["a", "b"], names_from="name", names_sep=None + index=["a", "b"], + names_from="name", + values_from=["points", "marks", "sets"], + names_sep=None, ) result = result.pivot_longer( @@ -271,6 +274,10 @@ def test_flatten_levels_false(): ) +# some changes have been made to pd.pivot +# which affects this test +# ultimately pivot_wider will be deprecated in 1.x release +# users are already advised to use pd.pivot instead def test_no_index(): """Test output if no `index` is supplied.""" df_in = pd.DataFrame( @@ -278,21 +285,30 @@ def test_no_index(): "gender": ["Male", "Female", "Female", "Male", "Male"], "contVar": [22379, 24523, 23421, 23831, 29234], }, - index=pd.Int64Index([0, 0, 1, 1, 2], dtype="int64"), + index=[0, 0, 1, 1, 2], ) expected_output = pd.DataFrame( { - "contVar_Female": [24523.0, 23421.0, np.nan], - "contVar_Male": [22379.0, 23831.0, 29234.0], + "Female": [24523.0, 23421.0, np.nan], + "Male": [22379.0, 23831.0, 29234.0], } ) - result = df_in.pivot_wider(names_from="gender") + result = ( + df_in.reset_index() + .pivot_wider(names_from="gender", values_from="contVar", index="index") + .set_index("index") + .rename_axis(index=None) + ) assert_frame_equal(result, expected_output) +# some changes have been made to pd.pivot +# which affects this test +# ultimately pivot_wider will be deprecated in 1.x release +# users are already advised to use pd.pivot instead def test_no_index_names_from_order(): """Test output if no `index` is supplied and column order is maintained.""" df_in = pd.DataFrame( @@ -300,18 +316,22 @@ def test_no_index_names_from_order(): "gender": ["Male", "Female", "Female", "Male", "Male"], "contVar": [22379, 24523, 23421, 23831, 29234], }, - index=pd.Int64Index([0, 0, 1, 1, 2], dtype="int64"), + index=[0, 0, 1, 1, 2], ) expected_output = pd.DataFrame( { - "contVar_Male": [22379.0, 23831.0, 29234.0], - "contVar_Female": [24523.0, 23421.0, np.nan], + "Male": [22379.0, 23831.0, 29234.0], + "Female": [24523.0, 23421.0, np.nan], } ) - result = df_in.encode_categorical(gender="appearance").pivot_wider( - names_from="gender" + result = ( + df_in.encode_categorical(gender="appearance") + .reset_index() + .pivot_wider(names_from="gender", values_from="contVar", index="index") + .set_index("index") + .rename_axis(index=None) ) assert_frame_equal(result, expected_output) @@ -426,7 +446,9 @@ def test_names_glue_single_column(df_checks_output): """ df_out = ( - df_checks_output.pivot(["geoid", "name"], "variable", "estimate") + df_checks_output.pivot( + index=["geoid", "name"], columns="variable", values="estimate" + ) .add_suffix("_estimate") .rename_axis(columns=None) .reset_index() @@ -494,9 +516,9 @@ def df_expand(): def test_names_expand(df_expand): """Test output if `names_expand`""" - actual = df_expand.pivot("year", "id", "percentage").reindex( - columns=pd.Categorical([1, 2, 3], ordered=True) - ) + actual = df_expand.pivot( + index="year", columns="id", values="percentage" + ).reindex(columns=pd.Categorical([1, 2, 3], ordered=True)) expected = df_expand.pivot_wider( "year", "id", "percentage", names_expand=True, flatten_levels=False ) @@ -506,7 +528,7 @@ def test_names_expand(df_expand): def test_names_expand_flatten_levels(df_expand): """Test output if `names_expand`""" actual = ( - df_expand.pivot("year", "id", "percentage") + df_expand.pivot(index="year", columns="id", values="percentage") .reindex(columns=[1, 2, 3]) .rename_axis(columns=None) .reset_index() @@ -519,9 +541,9 @@ def test_names_expand_flatten_levels(df_expand): def test_index_expand(df_expand): """Test output if `index_expand`""" - actual = df_expand.pivot("id", "year", "percentage").reindex( - pd.Categorical([1, 2, 3], ordered=True) - ) + actual = df_expand.pivot( + index="id", columns="year", values="percentage" + ).reindex(pd.Categorical([1, 2, 3], ordered=True)) expected = df_expand.pivot_wider( "id", "year", "percentage", index_expand=True, flatten_levels=False ) @@ -531,7 +553,7 @@ def test_index_expand(df_expand): def test_index_expand_flatten_levels(df_expand): """Test output if `index_expand`""" actual = ( - df_expand.pivot("id", "year", "percentage") + df_expand.pivot(index="id", columns="year", values="percentage") .reindex(pd.Categorical([1, 2, 3], ordered=True)) .rename_axis(columns=None) .reset_index() @@ -552,7 +574,7 @@ def test_expand_multiple_levels(df_expand): flatten_levels=False, ) actual = df_expand.complete("year", "gender", "id").pivot( - "id", ("year", "gender"), "percentage" + index="id", columns=("year", "gender"), values="percentage" ) assert_frame_equal(actual, expected) @@ -568,7 +590,7 @@ def test_expand_multiple_levels_flatten_levels(df_expand): ) actual = ( df_expand.complete("year", "gender", "id") - .pivot("id", ("year", "gender"), "percentage") + .pivot(index="id", columns=("year", "gender"), values="percentage") .collapse_levels() .reset_index() ) diff --git a/tests/functions/test_select.py b/tests/functions/test_select.py index 3ac8908c6..0245b91ac 100644 --- a/tests/functions/test_select.py +++ b/tests/functions/test_select.py @@ -22,6 +22,47 @@ def dataframe(): ) +def test_args_and_rows_and_columns(dataframe): + """ + Raise if args and rows/columns are provided. + """ + with pytest.raises( + ValueError, + match="Either provide variable args with the axis parameter,.+", + ): + dataframe.select("*", columns="*") + + +def test_args_invert(dataframe): + """Raise if args and invert is not a boolean""" + with pytest.raises(TypeError, match="invert should be one of.+"): + dataframe.select("col1", invert=1, axis="columns") + + +def test_args_axis(dataframe): + """Raise ValueError if args and axis is not index/columns""" + with pytest.raises( + ValueError, match="axis should be either 'index' or 'columns'." + ): + dataframe.select("col1", axis=1) + + +def test_invert(dataframe): + "Test output if invert is provided." + actual = dataframe.select( + columns=["col1"], index=("bar", "one"), invert=True + ) + expected = dataframe.loc[("bar", "two"):, ["col2"]] + assert_frame_equal(actual, expected) + + +def test_invert_args(dataframe): + "Test output if invert is provided." + actual = dataframe.select(("bar", "one"), axis="index", invert=True) + expected = dataframe.loc[("bar", "two"):, :] + assert_frame_equal(actual, expected) + + def test_select_all_columns(dataframe): """Test output for select""" actual = dataframe.select(columns="*") @@ -30,20 +71,27 @@ def test_select_all_columns(dataframe): def test_select_all_rows(dataframe): """Test output for select""" - actual = dataframe.select(rows="*") + actual = dataframe.select(index="*") assert_frame_equal(actual, dataframe) def test_select_rows_only(dataframe): """Test output for rows only""" - actual = dataframe.select(rows={"B": "two"}) + actual = dataframe.select(index={"B": "two"}) + expected = dataframe.loc(axis=0)[(slice(None), "two")] + assert_frame_equal(actual, expected) + + +def test_select_rows_only_args(dataframe): + """Test output for rows only""" + actual = dataframe.select({"B": "two"}, axis="index") expected = dataframe.loc(axis=0)[(slice(None), "two")] assert_frame_equal(actual, expected) def test_select_rows_scalar_(dataframe): """Test output for rows only""" - actual = dataframe.select(rows="bar") + actual = dataframe.select(index="bar") expected = dataframe.xs("bar", axis=0, level=0, drop_level=False) assert_frame_equal(actual, expected) @@ -55,6 +103,13 @@ def test_select_columns_only(dataframe): assert_frame_equal(actual, expected) +def test_select_columns_only_args(dataframe): + """Test output for columns only""" + actual = dataframe.select("col1", "col2", axis="columns") + expected = dataframe.loc[:, :] + assert_frame_equal(actual, expected) + + def test_select_single_column(dataframe): """Test output for columns only""" actual = dataframe.select(columns="col1") @@ -64,7 +119,7 @@ def test_select_single_column(dataframe): def test_select_single_row(dataframe): """Test output for row only""" - actual = dataframe.select(rows=("bar", "one")) + actual = dataframe.select(index=("bar", "one")) expected = dataframe.loc[[("bar", "one")]] assert_frame_equal(actual, expected) @@ -79,7 +134,7 @@ def test_select_columns_scalar(dataframe): def test_select_rows_and_columns(dataframe): """Test output for both rows and columns""" actual = dataframe.select( - rows=DropLabel(lambda df: df.eval('A == "foo"')), + index=DropLabel(lambda df: df.eval('A == "foo"')), columns=DropLabel(slice("col2", None)), ) expected = dataframe.loc[["bar", "baz", "qux"], ["col1"]] diff --git a/tests/timeseries/test_sort_timestamps_monotonically.py b/tests/timeseries/test_sort_timestamps_monotonically.py index 54aa816d0..92624b6c0 100644 --- a/tests/timeseries/test_sort_timestamps_monotonically.py +++ b/tests/timeseries/test_sort_timestamps_monotonically.py @@ -47,8 +47,8 @@ def test_sort_timestamps_monotonically_decreasing(timeseries_dataframe): def test_sort_timestamps_monotonically_strict(timeseries_dataframe): """Test sort_timestamps_monotonically for index duplication handling""" df = timeseries_dataframe.shuffle(reset_index=False) - random_number = randint(1, len(timeseries_dataframe)) - df = df.append( - df.loc[df.index[random_number], :] + random_number = df.index[randint(1, len(timeseries_dataframe))] + df = pd.concat( + [df, df.loc[[random_number], :]] ).sort_timestamps_monotonically(direction="increasing", strict=True) assert df.equals(timeseries_dataframe) diff --git a/tests/utils/test_clean_accounting_column.py b/tests/utils/test_clean_accounting_column.py deleted file mode 100644 index e48685fdd..000000000 --- a/tests/utils/test_clean_accounting_column.py +++ /dev/null @@ -1,17 +0,0 @@ -import pytest - -from janitor.functions.currency_column_to_numeric import ( - _clean_accounting_column, -) - - -@pytest.mark.utils -def test_clean_accounting_column(): - test_str = "(1,000)" - assert _clean_accounting_column(test_str) == float(-1000) - - -@pytest.mark.utils -def test_clean_accounting_column_zeroes(): - test_str = "()" - assert _clean_accounting_column(test_str) == 0.00