CLN: spelling fixes in docstrings (#34039)

* spelling fixes * add whatsnew entry * fix doctests * and one more that wasn't caught
pandas-dev · May 8, 2020 · 6388370 · 6388370
1 parent 3ed7dff
commit 6388370
Show file tree

Hide file tree

Showing 44 changed files with 81 additions and 82 deletions.
diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py
@@ -89,7 +89,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i
     ]
     uniquified_aggspec = _make_unique_kwarg_list(aggspec_order)
 
-    # get the new indice of columns by comparison
+    # get the new index of columns by comparison
     col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
     return aggspec, columns, col_idx_order
 
@@ -182,7 +182,7 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
     is_dict = is_dict_like(agg_spec)
     if not (is_dict or is_list_like(agg_spec)):
         return agg_spec
-    mangled_aggspec = type(agg_spec)()  # dict or OrderdDict
+    mangled_aggspec = type(agg_spec)()  # dict or OrderedDict
 
     if is_dict:
         for key, aggfuncs in agg_spec.items():

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -125,7 +125,7 @@ def _ensure_data(
 
     except (TypeError, ValueError, OverflowError):
         # if we are trying to coerce to a dtype
-        # and it is incompat this will fall through to here
+        # and it is incompatible this will fall through to here
         return ensure_object(values), np.dtype("object")
 
     # datetimelike
@@ -473,7 +473,7 @@ def _factorize_array(
     values : ndarray
     na_sentinel : int, default -1
     size_hint : int, optional
-        Passsed through to the hashtable's 'get_labels' method
+        Passed through to the hashtable's 'get_labels' method
     na_value : object, optional
         A value in `values` to consider missing. Note: only use this
         parameter when you know that you don't have any values pandas would
@@ -1239,7 +1239,7 @@ def get_indexer(current_indexer, other_indexer):
                 break
 
             # Now find all values which are equal to
-            # the (nsmallest: largest)/(nlarrgest: smallest)
+            # the (nsmallest: largest)/(nlargest: smallest)
             # from our series.
             border_value = values == values[values.index[-1]]
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -696,7 +696,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
         na_value : object
             The value in `values` to consider missing. This will be treated
             as NA in the factorization routines, so it will be coded as
-            `na_sentinal` and not included in `uniques`. By default,
+            `na_sentinel` and not included in `uniques`. By default,
             ``np.nan`` is used.
 
         Notes

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -2015,7 +2015,7 @@ def __setitem__(self, key, value):
         # tuple of indexers (dataframe)
         elif isinstance(key, tuple):
             # only allow 1 dimensional slicing, but can
-            # in a 2-d case be passd (slice(None),....)
+            # in a 2-d case be passed (slice(None),....)
             if len(key) == 2:
                 if not com.is_null_slice(key[0]):
                     raise AssertionError("invalid slicing for a 1-ndim categorical")

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -198,7 +198,7 @@ def _check_compatible_with(
         ----------
         other
         setitem : bool, default False
-            For __setitem__ we may have stricter compatibility resrictions than
+            For __setitem__ we may have stricter compatibility restrictions than
             for comparisons.
 
         Raises

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -284,7 +284,7 @@ def __array__(self, dtype=None) -> np.ndarray:
         elif dtype == bool:
             return ~self._isnan
 
-        # This will raise TypeErorr for non-object dtypes
+        # This will raise TypeError for non-object dtypes
         return np.array(list(self), dtype=object)
 
     def __arrow_array__(self, type=None):

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -784,7 +784,7 @@ def __getitem__(self, key):
             # TODO: I think we can avoid densifying when masking a
             # boolean SparseArray with another. Need to look at the
             # key's fill_value for True / False, and then do an intersection
-            # on the indicies of the sp_values.
+            # on the indices of the sp_values.
             if isinstance(key, SparseArray):
                 if is_bool_dtype(key):
                     key = key.to_dense()

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -258,7 +258,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
         if start is None and end is None:
             if closed is not None:
                 raise ValueError(
-                    "Closed has to be None if not both of startand end are defined"
+                    "Closed has to be None if not both of start and end are defined"
                 )
 
         left_closed, right_closed = dtl.validate_endpoints(closed)
@@ -877,7 +877,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
     """
     Parameters
     ----------
-    array : list-like
+    data : list-like
     copy : bool, default False
     unit : str, default "ns"
         The timedelta unit to treat integers as multiples of.
@@ -930,7 +930,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
         copy = copy and not copy_made
 
     elif is_float_dtype(data.dtype):
-        # cast the unit, multiply base/frace separately
+        # cast the unit, multiply base/frac separately
         # to avoid precision issues from float -> int
         mask = np.isnan(data)
         m, p = precision_from_unit(unit)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -604,7 +604,7 @@ def use_inf_as_na_cb(key):
 : str
     The plotting backend to use. The default value is "matplotlib", the
     backend provided with pandas. Other backends can be specified by
-    prodiving the name of the module that implements the backend.
+    providing the name of the module that implements the backend.
 """
 
 

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -543,7 +543,7 @@ def _try_cast(
         return subarr
 
     try:
-        # GH#15832: Check if we are requesting a numeric dype and
+        # GH#15832: Check if we are requesting a numeric dtype and
         # that we can convert the data to the requested dtype.
         if is_integer_dtype(dtype):
             # this will raise if we have e.g. floats

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -198,8 +198,7 @@ def maybe_downcast_numeric(result, dtype, do_round: bool = False):
         return result
 
     if isinstance(result, list):
-        # reached via groupoby.agg _ohlc; really this should be handled
-        #  earlier
+        # reached via groupby.agg._ohlc; really this should be handled earlier
         result = np.array(result)
 
     def trans(x):
@@ -1693,7 +1692,7 @@ def convert_scalar_for_putitemlike(scalar, dtype: np.dtype):
     Parameters
     ----------
     scalar : scalar
-    dtype : np.dtpye
+    dtype : np.dtype
 
     Returns
     -------

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -357,7 +357,7 @@ def _concatenate_2d(to_concat, axis: int):
 def concat_datetime(to_concat, axis=0, typs=None):
     """
     provide concatenation of an datetimelike array of arrays each of which is a
-    single M8[ns], datetimet64[ns, tz] or m8[ns] dtype
+    single M8[ns], datetime64[ns, tz] or m8[ns] dtype
 
     Parameters
     ----------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -644,7 +644,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
 
         In case of non-interactive session, no boundaries apply.
 
-        `ignore_width` is here so ipnb+HTML output can behave the way
+        `ignore_width` is here so ipynb+HTML output can behave the way
         users expect. display.max_columns remains in effect.
         GH3541, GH3573
         """

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1555,7 +1555,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
 
         Parameters
         ----------
-        f : function
+        func : function
             Function to apply to each subframe. Should return True or False.
         dropna : Drop groups that do not pass the filter. True by default;
             If False, groups that evaluate False are filled with NaNs.

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -405,8 +405,8 @@ def _get_cython_func_and_vals(
 
         Parameters
         ----------
-        kind : sttr
-        how : srt
+        kind : str
+        how : str
         values : np.ndarray
         is_numeric : bool
 
@@ -643,7 +643,7 @@ def agg_series(
             return self._aggregate_series_pure_python(obj, func)
 
         elif obj.index._has_complex_internals:
-            # Pre-empt TypeError in _aggregate_series_fast
+            # Preempt TypeError in _aggregate_series_fast
             return self._aggregate_series_pure_python(obj, func)
 
         try:
@@ -895,7 +895,7 @@ def agg_series(
         assert len(self.bins) > 0  # otherwise we'd get IndexError in get_result
 
         if is_extension_array_dtype(obj.dtype):
-            # pre-empt SeriesBinGrouper from raising TypeError
+            # preempt SeriesBinGrouper from raising TypeError
             return self._aggregate_series_pure_python(obj, func)
 
         dummy = obj[:0]

diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
@@ -441,7 +441,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
     """
     from pandas.core.construction import array as pd_array
 
-    # whathever is not an array-like is returned as-is (possible valid array
+    # whatever is not an array-like is returned as-is (possible valid array
     # indexers that are not array-like: integer, slice, Ellipsis, None)
     # In this context, tuples are not considered as array-like, as they have
     # a specific meaning in indexing (multi-dimensional indexing)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2784,7 +2784,7 @@ def get_loc_level(self, key, level=0, drop_level: bool = True):
         def maybe_mi_droplevels(indexer, levels, drop_level: bool):
             if not drop_level:
                 return self[indexer]
-            # kludgearound
+            # kludge around
             orig_index = new_index = self[indexer]
             levels = [self._get_level_number(i) for i in levels]
             for i in sorted(levels, reverse=True):

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
@@ -473,7 +473,7 @@ def get_loc(self, key, method=None, tolerance=None):
         Parameters
         ----------
         key : Period, NaT, str, or datetime
-            String or datetime key must be parseable as Period.
+            String or datetime key must be parsable as Period.
 
         Returns
         -------

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -741,7 +741,7 @@ def _make_evaluate_binop(op, step=False):
             """
             Parameters
             ----------
-            op : callable that accepts 2 parms
+            op : callable that accepts 2 params
                 perform the binary op
             step : callable, optional, default to False
                 op to apply to the step parm if not None

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -90,7 +90,7 @@ class IndexingError(Exception):
 
 class IndexingMixin:
     """
-    Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series.
+    Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
     """
 
     @property
@@ -1498,7 +1498,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
         return key
 
     def _get_setitem_indexer(self, key):
-        # GH#32257 Fall through to let numnpy do validation
+        # GH#32257 Fall through to let numpy do validation
         return key
 
     # -------------------------------------------------------------------
@@ -2257,9 +2257,9 @@ def need_slice(obj) -> bool:
 
 def _non_reducing_slice(slice_):
     """
-    Ensurse that a slice doesn't reduce to a Series or Scalar.
+    Ensure that a slice doesn't reduce to a Series or Scalar.
 
-    Any user-paseed `subset` should have this called on it
+    Any user-passed `subset` should have this called on it
     to make sure we're always working with DataFrames.
     """
     # default to column slice, like DataFrame

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -825,7 +825,7 @@ def setitem(self, indexer, value):
 
             return self.astype(dtype).setitem(indexer, value)
 
-        # value must be storeable at this moment
+        # value must be storable at this moment
         if is_extension_array_dtype(getattr(value, "dtype", None)):
             # We need to be careful not to allow through strings that
             #  can be parsed to EADtypes

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -600,7 +600,7 @@ def replace_list(
         """ do a list replace """
         inplace = validate_bool_kwarg(inplace, "inplace")
 
-        # figure out our mask a-priori to avoid repeated replacements
+        # figure out our mask apriori to avoid repeated replacements
         values = self.as_array()
 
         def comp(s, regex=False):

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -143,7 +143,7 @@ def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool:
 
         # GH 9422
         # further we also want to preserve NaN when all elements
-        # are NaN, unlinke bottleneck/numpy which consider this
+        # are NaN, unlike bottleneck/numpy which consider this
         # to be 0
         if name in ["nansum", "nanprod"]:
             return False

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
@@ -131,7 +131,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
     """
     Return the result of evaluating op on the passed in values.
 
-    If native types are not compatible, try coersion to object dtype.
+    If native types are not compatible, try coercion to object dtype.
 
     Parameters
     ----------
@@ -184,7 +184,7 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str):
 
     Returns
     -------
-    ndarrray or ExtensionArray
+    ndarray or ExtensionArray
         Or a 2-tuple of these in the case of divmod or rdivmod.
     """
 
@@ -315,7 +315,7 @@ def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike:
 
     Returns
     -------
-    ndarrray or ExtensionArray
+    ndarray or ExtensionArray
     """
     fill_int = lambda x: x
 

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -652,7 +652,7 @@ def __init__(
         ) = self._get_merge_keys()
 
         # validate the merge keys dtypes. We may need to coerce
-        # to avoid incompat dtypes
+        # to avoid incompatible dtypes
         self._maybe_coerce_merge_keys()
 
         # If argument passed to validate,
@@ -1067,7 +1067,7 @@ def _get_merge_keys(self):
         return left_keys, right_keys, join_names
 
     def _maybe_coerce_merge_keys(self):
-        # we have valid mergees but we may have to further
+        # we have valid merges but we may have to further
         # coerce these if they are originally incompatible types
         #
         # for example if these are categorical, but are not dtype_equal
@@ -1392,7 +1392,7 @@ def _restore_dropped_levels_multijoin(
 
     """
 
-    def _convert_to_mulitindex(index) -> MultiIndex:
+    def _convert_to_multiindex(index) -> MultiIndex:
         if isinstance(index, MultiIndex):
             return index
         else:
@@ -1402,7 +1402,7 @@ def _convert_to_mulitindex(index) -> MultiIndex:
     # the returned index if of type Index
     # Assure that join_index is of type MultiIndex
     # so that dropped levels can be appended
-    join_index = _convert_to_mulitindex(join_index)
+    join_index = _convert_to_multiindex(join_index)
 
     join_levels = join_index.levels
     join_codes = join_index.codes

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -210,7 +210,7 @@ def _add_margins(
     grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name)
 
     if table.ndim == 2:
-        # i.e. DataFramae
+        # i.e. DataFrame
         for level in table.columns.names[1:]:
             if margins_name in table.columns.get_level_values(level):
                 raise ValueError(msg)

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -440,7 +440,7 @@ def _bins_to_cuts(
                 categories=labels if len(set(labels)) == len(labels) else None,
                 ordered=ordered,
             )
-        # TODO: handle mismach between categorical label order and pandas.cut order.
+        # TODO: handle mismatch between categorical label order and pandas.cut order.
         np.putmask(ids, na_mask, 0)
         result = algos.take_nd(labels, ids - 1)