diff --git a/doc/source/api.rst b/doc/source/api.rst index abf0c235db895..fa898a95e6694 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -559,6 +559,8 @@ strings and apply several methods to it. These can be acccessed like Series.str.islower Series.str.isupper Series.str.istitle + Series.str.isnumeric + Series.str.isdecimal Series.str.get_dummies .. _api.categorical: diff --git a/doc/source/text.rst b/doc/source/text.rst index 2852d93a3731e..debf24f21c735 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -238,3 +238,5 @@ Method Summary :meth:`~Series.str.islower`,Equivalent to ``str.islower`` :meth:`~Series.str.isupper`,Equivalent to ``str.isupper`` :meth:`~Series.str.istitle`,Equivalent to ``str.istitle`` + :meth:`~Series.str.isnumeric`,Equivalent to ``str.isnumeric`` + :meth:`~Series.str.isnumeric`,Equivalent to ``str.isdecimal`` diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index e8b398aec4b74..5c7fb2f3fe13c 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -157,7 +157,7 @@ Enhancements ``isupper()``, ``istitle()`` which behave as the same as standard ``str`` (:issue:`9282`) - +- Added ``StringMethods.isnumeric`` and ``isdecimal`` which behave as the same as standard ``str`` (:issue:`9439`) - Added ``StringMethods.ljust()`` and ``rjust()`` which behave as the same as standard ``str`` (:issue:`9352`) - ``StringMethods.pad()`` and ``center()`` now accept ``fillchar`` option to specify filling character (:issue:`9352`) - Added ``StringMethods.zfill()`` which behave as the same as standard ``str`` (:issue:`9387`) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c470ae65c4d61..605f3a42651fb 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1096,23 +1096,37 @@ def get_dummies(self, sep='|'): docstring=_shared_docs['casemethods'] % 'titlecase') _shared_docs['ismethods'] = (""" - Check whether all characters in each string in the array are %s + Check whether all characters in each string in the array are %(type)s. + Equivalent to ``str.%(method)s``. Returns ------- Series of boolean values """) + _shared_docs['isalnum'] = dict(type='alphanumeric', method='isalnum') + _shared_docs['isalpha'] = dict(type='alphabetic', method='isalpha') + _shared_docs['isdigit'] = dict(type='digits', method='isdigit') + _shared_docs['isspace'] = dict(type='whitespace', method='isspace') + _shared_docs['islower'] = dict(type='lowercase', method='islower') + _shared_docs['isupper'] = dict(type='uppercase', method='isupper') + _shared_docs['istitle'] = dict(type='titlecase', method='istitle') + _shared_docs['isnumeric'] = dict(type='numeric', method='isnumeric') + _shared_docs['isdecimal'] = dict(type='decimal', method='isdecimal') isalnum = _noarg_wrapper(lambda x: x.isalnum(), - docstring=_shared_docs['ismethods'] % 'alphanumeric') + docstring=_shared_docs['ismethods'] % _shared_docs['isalnum']) isalpha = _noarg_wrapper(lambda x: x.isalpha(), - docstring=_shared_docs['ismethods'] % 'alphabetic') + docstring=_shared_docs['ismethods'] % _shared_docs['isalpha']) isdigit = _noarg_wrapper(lambda x: x.isdigit(), - docstring=_shared_docs['ismethods'] % 'digits') + docstring=_shared_docs['ismethods'] % _shared_docs['isdigit']) isspace = _noarg_wrapper(lambda x: x.isspace(), - docstring=_shared_docs['ismethods'] % 'whitespace') + docstring=_shared_docs['ismethods'] % _shared_docs['isspace']) islower = _noarg_wrapper(lambda x: x.islower(), - docstring=_shared_docs['ismethods'] % 'lowercase') + docstring=_shared_docs['ismethods'] % _shared_docs['islower']) isupper = _noarg_wrapper(lambda x: x.isupper(), - docstring=_shared_docs['ismethods'] % 'uppercase') + docstring=_shared_docs['ismethods'] % _shared_docs['isupper']) istitle = _noarg_wrapper(lambda x: x.istitle(), - docstring=_shared_docs['ismethods'] % 'titlecase') + docstring=_shared_docs['ismethods'] % _shared_docs['istitle']) + isnumeric = _noarg_wrapper(lambda x: compat.u_safe(x).isnumeric(), + docstring=_shared_docs['ismethods'] % _shared_docs['isnumeric']) + isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(), + docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index a729c4826ada3..0d9875fb9d4b1 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -14,7 +14,7 @@ from numpy.testing import assert_array_equal from numpy.random import randint -from pandas.compat import range, lrange, u +from pandas.compat import range, lrange, u, unichr import pandas.compat as compat from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, bdate_range, date_range, MultiIndex) @@ -630,6 +630,8 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty.str.islower()) tm.assert_series_equal(empty_str, empty.str.isupper()) tm.assert_series_equal(empty_str, empty.str.istitle()) + tm.assert_series_equal(empty_str, empty.str.isnumeric()) + tm.assert_series_equal(empty_str, empty.str.isdecimal()) def test_ismethods(self): values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' '] @@ -659,6 +661,31 @@ def test_ismethods(self): self.assertEquals(str_s.str.isupper().tolist(), [v.isupper() for v in values]) self.assertEquals(str_s.str.istitle().tolist(), [v.istitle() for v in values]) + def test_isnumeric(self): + # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER + # 0x2605: ★ not number + # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY + # 0xFF13: 3 Em 3 + values = ['A', '3', unichr(0x00bc), unichr(0x2605), + unichr(0x1378), unichr(0xFF13), 'four'] + s = Series(values) + numeric_e = [False, True, True, False, True, True, False] + decimal_e = [False, True, False, False, False, True, False] + tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e)) + tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) + unicodes = [u('A'), u('3'), unichr(0x00bc), unichr(0x2605), + unichr(0x1378), unichr(0xFF13), u('four')] + self.assertEquals(s.str.isnumeric().tolist(), [v.isnumeric() for v in unicodes]) + self.assertEquals(s.str.isdecimal().tolist(), [v.isdecimal() for v in unicodes]) + + values = ['A', np.nan, unichr(0x00bc), unichr(0x2605), + np.nan, unichr(0xFF13), 'four'] + s = Series(values) + numeric_e = [False, np.nan, True, False, np.nan, True, False] + decimal_e = [False, np.nan, False, False, np.nan, True, False] + tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e)) + tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) + def test_get_dummies(self): s = Series(['a|b', 'a|c', np.nan]) result = s.str.get_dummies('|')