diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index f6380808d5ac2..6130e05b2a4dc 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -89,7 +89,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i ] uniquified_aggspec = _make_unique_kwarg_list(aggspec_order) - # get the new indice of columns by comparison + # get the new index of columns by comparison col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) return aggspec, columns, col_idx_order @@ -182,7 +182,7 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: is_dict = is_dict_like(agg_spec) if not (is_dict or is_list_like(agg_spec)): return agg_spec - mangled_aggspec = type(agg_spec)() # dict or OrderdDict + mangled_aggspec = type(agg_spec)() # dict or OrderedDict if is_dict: for key, aggfuncs in agg_spec.items(): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c2115094918e5..b97063cfa7fd0 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -125,7 +125,7 @@ def _ensure_data( except (TypeError, ValueError, OverflowError): # if we are trying to coerce to a dtype - # and it is incompat this will fall through to here + # and it is incompatible this will fall through to here return ensure_object(values), np.dtype("object") # datetimelike @@ -473,7 +473,7 @@ def _factorize_array( values : ndarray na_sentinel : int, default -1 size_hint : int, optional - Passsed through to the hashtable's 'get_labels' method + Passed through to the hashtable's 'get_labels' method na_value : object, optional A value in `values` to consider missing. Note: only use this parameter when you know that you don't have any values pandas would @@ -1239,7 +1239,7 @@ def get_indexer(current_indexer, other_indexer): break # Now find all values which are equal to - # the (nsmallest: largest)/(nlarrgest: smallest) + # the (nsmallest: largest)/(nlargest: smallest) # from our series. border_value = values == values[values.index[-1]] diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bd903d9b1fae3..92b84907e1c11 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -696,7 +696,7 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: na_value : object The value in `values` to consider missing. This will be treated as NA in the factorization routines, so it will be coded as - `na_sentinal` and not included in `uniques`. By default, + `na_sentinel` and not included in `uniques`. By default, ``np.nan`` is used. Notes diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 59ae2dc171cf8..dc807f467f65e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2015,7 +2015,7 @@ def __setitem__(self, key, value): # tuple of indexers (dataframe) elif isinstance(key, tuple): # only allow 1 dimensional slicing, but can - # in a 2-d case be passd (slice(None),....) + # in a 2-d case be passed (slice(None),....) if len(key) == 2: if not com.is_null_slice(key[0]): raise AssertionError("invalid slicing for a 1-ndim categorical") diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a741c32a1035a..9ee150447eb5f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -198,7 +198,7 @@ def _check_compatible_with( ---------- other setitem : bool, default False - For __setitem__ we may have stricter compatibility resrictions than + For __setitem__ we may have stricter compatibility restrictions than for comparisons. Raises diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d04fdee8961e4..8be714820d18b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -284,7 +284,7 @@ def __array__(self, dtype=None) -> np.ndarray: elif dtype == bool: return ~self._isnan - # This will raise TypeErorr for non-object dtypes + # This will raise TypeError for non-object dtypes return np.array(list(self), dtype=object) def __arrow_array__(self, type=None): diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 0877e98e55311..9b6d4ad7323d0 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -784,7 +784,7 @@ def __getitem__(self, key): # TODO: I think we can avoid densifying when masking a # boolean SparseArray with another. Need to look at the # key's fill_value for True / False, and then do an intersection - # on the indicies of the sp_values. + # on the indices of the sp_values. if isinstance(key, SparseArray): if is_bool_dtype(key): key = key.to_dense() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a460d07e1f6f2..a62f94b1a3665 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -258,7 +258,7 @@ def _generate_range(cls, start, end, periods, freq, closed=None): if start is None and end is None: if closed is not None: raise ValueError( - "Closed has to be None if not both of startand end are defined" + "Closed has to be None if not both of start and end are defined" ) left_closed, right_closed = dtl.validate_endpoints(closed) @@ -877,7 +877,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): """ Parameters ---------- - array : list-like + data : list-like copy : bool, default False unit : str, default "ns" The timedelta unit to treat integers as multiples of. @@ -930,7 +930,7 @@ def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): copy = copy and not copy_made elif is_float_dtype(data.dtype): - # cast the unit, multiply base/frace separately + # cast the unit, multiply base/frac separately # to avoid precision issues from float -> int mask = np.isnan(data) m, p = precision_from_unit(unit) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 4d5b9c6920e48..5089445c79897 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -604,7 +604,7 @@ def use_inf_as_na_cb(key): : str The plotting backend to use. The default value is "matplotlib", the backend provided with pandas. Other backends can be specified by - prodiving the name of the module that implements the backend. + providing the name of the module that implements the backend. """ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e6e26f0eec597..b110a316a76d9 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -543,7 +543,7 @@ def _try_cast( return subarr try: - # GH#15832: Check if we are requesting a numeric dype and + # GH#15832: Check if we are requesting a numeric dtype and # that we can convert the data to the requested dtype. if is_integer_dtype(dtype): # this will raise if we have e.g. floats diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b91cfde45f079..9865a7d28542d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -198,8 +198,7 @@ def maybe_downcast_numeric(result, dtype, do_round: bool = False): return result if isinstance(result, list): - # reached via groupoby.agg _ohlc; really this should be handled - # earlier + # reached via groupby.agg._ohlc; really this should be handled earlier result = np.array(result) def trans(x): @@ -1693,7 +1692,7 @@ def convert_scalar_for_putitemlike(scalar, dtype: np.dtype): Parameters ---------- scalar : scalar - dtype : np.dtpye + dtype : np.dtype Returns ------- diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 82b2795582ff1..e7e8d016e52b2 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -357,7 +357,7 @@ def _concatenate_2d(to_concat, axis: int): def concat_datetime(to_concat, axis=0, typs=None): """ provide concatenation of an datetimelike array of arrays each of which is a - single M8[ns], datetimet64[ns, tz] or m8[ns] dtype + single M8[ns], datetime64[ns, tz] or m8[ns] dtype Parameters ---------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4e86b3710a1bd..1203cd9fbd1b3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -644,7 +644,7 @@ def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: In case of non-interactive session, no boundaries apply. - `ignore_width` is here so ipnb+HTML output can behave the way + `ignore_width` is here so ipynb+HTML output can behave the way users expect. display.max_columns remains in effect. GH3541, GH3573 """ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index af5930fd22869..438babda9ff7a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1555,7 +1555,7 @@ def filter(self, func, dropna=True, *args, **kwargs): Parameters ---------- - f : function + func : function Function to apply to each subframe. Should return True or False. dropna : Drop groups that do not pass the filter. True by default; If False, groups that evaluate False are filled with NaNs. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 71d7a07aadf7f..43e6b02e9dc53 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -405,8 +405,8 @@ def _get_cython_func_and_vals( Parameters ---------- - kind : sttr - how : srt + kind : str + how : str values : np.ndarray is_numeric : bool @@ -643,7 +643,7 @@ def agg_series( return self._aggregate_series_pure_python(obj, func) elif obj.index._has_complex_internals: - # Pre-empt TypeError in _aggregate_series_fast + # Preempt TypeError in _aggregate_series_fast return self._aggregate_series_pure_python(obj, func) try: @@ -895,7 +895,7 @@ def agg_series( assert len(self.bins) > 0 # otherwise we'd get IndexError in get_result if is_extension_array_dtype(obj.dtype): - # pre-empt SeriesBinGrouper from raising TypeError + # preempt SeriesBinGrouper from raising TypeError return self._aggregate_series_pure_python(obj, func) dummy = obj[:0] diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 3d0e3699264a8..6dbcfef46fa98 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -441,7 +441,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: """ from pandas.core.construction import array as pd_array - # whathever is not an array-like is returned as-is (possible valid array + # whatever is not an array-like is returned as-is (possible valid array # indexers that are not array-like: integer, slice, Ellipsis, None) # In this context, tuples are not considered as array-like, as they have # a specific meaning in indexing (multi-dimensional indexing) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 72369a13b150f..f1e1ebcaca1c4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2784,7 +2784,7 @@ def get_loc_level(self, key, level=0, drop_level: bool = True): def maybe_mi_droplevels(indexer, levels, drop_level: bool): if not drop_level: return self[indexer] - # kludgearound + # kludge around orig_index = new_index = self[indexer] levels = [self._get_level_number(i) for i in levels] for i in sorted(levels, reverse=True): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 54892d5656990..b0b85f69396ba 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -473,7 +473,7 @@ def get_loc(self, key, method=None, tolerance=None): Parameters ---------- key : Period, NaT, str, or datetime - String or datetime key must be parseable as Period. + String or datetime key must be parsable as Period. Returns ------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index b463b8d738d30..c34b8965ca36a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -741,7 +741,7 @@ def _make_evaluate_binop(op, step=False): """ Parameters ---------- - op : callable that accepts 2 parms + op : callable that accepts 2 params perform the binary op step : callable, optional, default to False op to apply to the step parm if not None diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e51ec33ba8519..b857a59195695 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -90,7 +90,7 @@ class IndexingError(Exception): class IndexingMixin: """ - Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. + Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series. """ @property @@ -1498,7 +1498,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): return key def _get_setitem_indexer(self, key): - # GH#32257 Fall through to let numnpy do validation + # GH#32257 Fall through to let numpy do validation return key # ------------------------------------------------------------------- @@ -2257,9 +2257,9 @@ def need_slice(obj) -> bool: def _non_reducing_slice(slice_): """ - Ensurse that a slice doesn't reduce to a Series or Scalar. + Ensure that a slice doesn't reduce to a Series or Scalar. - Any user-paseed `subset` should have this called on it + Any user-passed `subset` should have this called on it to make sure we're always working with DataFrames. """ # default to column slice, like DataFrame diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e4dcffae45f67..67512114ec5b1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -825,7 +825,7 @@ def setitem(self, indexer, value): return self.astype(dtype).setitem(indexer, value) - # value must be storeable at this moment + # value must be storable at this moment if is_extension_array_dtype(getattr(value, "dtype", None)): # We need to be careful not to allow through strings that # can be parsed to EADtypes diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5ab792563f136..3b88edabe9eb0 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -600,7 +600,7 @@ def replace_list( """ do a list replace """ inplace = validate_bool_kwarg(inplace, "inplace") - # figure out our mask a-priori to avoid repeated replacements + # figure out our mask apriori to avoid repeated replacements values = self.as_array() def comp(s, regex=False): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index bab9df0b70598..9f4541e0917b9 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -143,7 +143,7 @@ def _bn_ok_dtype(dtype: DtypeObj, name: str) -> bool: # GH 9422 # further we also want to preserve NaN when all elements - # are NaN, unlinke bottleneck/numpy which consider this + # are NaN, unlike bottleneck/numpy which consider this # to be 0 if name in ["nansum", "nanprod"]: return False diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index a1d853e38e757..59ac2a2071f0a 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -131,7 +131,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal """ Return the result of evaluating op on the passed in values. - If native types are not compatible, try coersion to object dtype. + If native types are not compatible, try coercion to object dtype. Parameters ---------- @@ -184,7 +184,7 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): Returns ------- - ndarrray or ExtensionArray + ndarray or ExtensionArray Or a 2-tuple of these in the case of divmod or rdivmod. """ @@ -315,7 +315,7 @@ def logical_op(left: ArrayLike, right: Any, op) -> ArrayLike: Returns ------- - ndarrray or ExtensionArray + ndarray or ExtensionArray """ fill_int = lambda x: x diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e3f4a80ecce7c..bc612f891d362 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -652,7 +652,7 @@ def __init__( ) = self._get_merge_keys() # validate the merge keys dtypes. We may need to coerce - # to avoid incompat dtypes + # to avoid incompatible dtypes self._maybe_coerce_merge_keys() # If argument passed to validate, @@ -1067,7 +1067,7 @@ def _get_merge_keys(self): return left_keys, right_keys, join_names def _maybe_coerce_merge_keys(self): - # we have valid mergees but we may have to further + # we have valid merges but we may have to further # coerce these if they are originally incompatible types # # for example if these are categorical, but are not dtype_equal @@ -1392,7 +1392,7 @@ def _restore_dropped_levels_multijoin( """ - def _convert_to_mulitindex(index) -> MultiIndex: + def _convert_to_multiindex(index) -> MultiIndex: if isinstance(index, MultiIndex): return index else: @@ -1402,7 +1402,7 @@ def _convert_to_mulitindex(index) -> MultiIndex: # the returned index if of type Index # Assure that join_index is of type MultiIndex # so that dropped levels can be appended - join_index = _convert_to_mulitindex(join_index) + join_index = _convert_to_multiindex(join_index) join_levels = join_index.levels join_codes = join_index.codes diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 17473ac26dfd6..c8d5eecf0e496 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -210,7 +210,7 @@ def _add_margins( grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) if table.ndim == 2: - # i.e. DataFramae + # i.e. DataFrame for level in table.columns.names[1:]: if margins_name in table.columns.get_level_values(level): raise ValueError(msg) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 345239eeb2372..6eae54633befb 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -440,7 +440,7 @@ def _bins_to_cuts( categories=labels if len(set(labels)) == len(labels) else None, ordered=ordered, ) - # TODO: handle mismach between categorical label order and pandas.cut order. + # TODO: handle mismatch between categorical label order and pandas.cut order. np.putmask(ids, na_mask, 0) result = algos.take_nd(labels, ids - 1) diff --git a/pandas/core/series.py b/pandas/core/series.py index eb409b432f89c..64220862c3e51 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -912,7 +912,7 @@ def __getitem__(self, key): def _get_with(self, key): # other: fancy integer or otherwise if isinstance(key, slice): - # _convert_slice_indexer to determin if this slice is positional + # _convert_slice_indexer to determine if this slice is positional # or label based, and if the latter, convert to positional slobj = self.index._convert_slice_indexer(key, kind="getitem") return self._slice(slobj) @@ -3377,7 +3377,7 @@ def nlargest(self, n=5, keep="first") -> "Series": ... "Malta": 434000, "Maldives": 434000, ... "Brunei": 434000, "Iceland": 337000, ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Monserat": 5200} + ... "Anguilla": 11300, "Montserrat": 5200} >>> s = pd.Series(countries_population) >>> s Italy 59000000 @@ -3389,7 +3389,7 @@ def nlargest(self, n=5, keep="first") -> "Series": Nauru 11300 Tuvalu 11300 Anguilla 11300 - Monserat 5200 + Montserrat 5200 dtype: int64 The `n` largest elements where ``n=5`` by default. @@ -3475,7 +3475,7 @@ def nsmallest(self, n=5, keep="first") -> "Series": ... "Brunei": 434000, "Malta": 434000, ... "Maldives": 434000, "Iceland": 337000, ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Monserat": 5200} + ... "Anguilla": 11300, "Montserrat": 5200} >>> s = pd.Series(countries_population) >>> s Italy 59000000 @@ -3487,13 +3487,13 @@ def nsmallest(self, n=5, keep="first") -> "Series": Nauru 11300 Tuvalu 11300 Anguilla 11300 - Monserat 5200 + Montserrat 5200 dtype: int64 The `n` smallest elements where ``n=5`` by default. >>> s.nsmallest() - Monserat 5200 + Montserrat 5200 Nauru 11300 Tuvalu 11300 Anguilla 11300 @@ -3504,7 +3504,7 @@ def nsmallest(self, n=5, keep="first") -> "Series": 'first' so Nauru and Tuvalu will be kept. >>> s.nsmallest(3) - Monserat 5200 + Montserrat 5200 Nauru 11300 Tuvalu 11300 dtype: int64 @@ -3514,7 +3514,7 @@ def nsmallest(self, n=5, keep="first") -> "Series": with value 11300 based on the index order. >>> s.nsmallest(3, keep='last') - Monserat 5200 + Montserrat 5200 Anguilla 11300 Tuvalu 11300 dtype: int64 @@ -3523,7 +3523,7 @@ def nsmallest(self, n=5, keep="first") -> "Series": that the returned Series has four elements due to the three duplicates. >>> s.nsmallest(3, keep='all') - Monserat 5200 + Montserrat 5200 Nauru 11300 Tuvalu 11300 Anguilla 11300 diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 72d778524a364..1bb095d9bf72c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -134,7 +134,7 @@ def _map_stringarray( func: Callable[[str], Any], arr: "StringArray", na_value: Any, dtype: Dtype ) -> ArrayLike: """ - Map a callable over valid elements of a StringArrray. + Map a callable over valid elements of a StringArray. Parameters ---------- @@ -2008,11 +2008,11 @@ def _noarg_wrapper( docstring=None, forbidden_types=["bytes"], returns_string=True, - **kargs, + **kwargs, ): @forbid_nonstring_types(forbidden_types, name=name) def wrapper(self): - result = _na_map(f, self._parent, **kargs) + result = _na_map(f, self._parent, **kwargs) return self._wrap_result(result, returns_string=returns_string) wrapper.__name__ = f.__name__ if name is None else name @@ -2321,7 +2321,7 @@ def _get_series_list(self, others): elif all(not is_list_like(x) for x in others): return [Series(others, index=idx)] raise TypeError( - "others must be Series, Index, DataFrame, np.ndarrary " + "others must be Series, Index, DataFrame, np.ndarray " "or list-like (either containing only strings or " "containing only objects of type Series/Index/" "np.ndarray[1-dim])" diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index f4bd14ad5c679..40bff5a75709b 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -494,7 +494,7 @@ def paste_wsl(): # Automatic detection of clipboard mechanisms -# and importing is done in deteremine_clipboard(): +# and importing is done in determine_clipboard(): def determine_clipboard(): """ Determine the OS/platform and set the copy() and paste() functions diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index d68a1fdde8da9..d2d5fdc7ab8a2 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -180,7 +180,7 @@ def _sizeof_fmt(num, size_qualifier): if verbose: _verbose_repr() - elif verbose is False: # specifically set to False, not nesc None + elif verbose is False: # specifically set to False, not necessarily None _non_verbose_repr() else: if exceeds_info_cols: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index fecdf3b758f0f..f7ba4750bc2ad 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -124,7 +124,7 @@ class Styler: * Column label cells include * ``col_heading`` * ``col`` where `n` is the numeric position of the column - * ``evel`` where `k` is the level in a MultiIndex + * ``level`` where `k` is the level in a MultiIndex * Blank cells include ``blank`` * Data cells include ``data`` @@ -542,7 +542,7 @@ def render(self, **kwargs) -> str: d = self._translate() # filter out empty styles, every cell will have a class # but the list of props may just be [['', '']]. - # so we have the neested anys below + # so we have the nested anys below trimmed = [x for x in d["cellstyle"] if any(any(y) for y in x["props"])] d["cellstyle"] = trimmed d.update(kwargs) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index e833fdc20d542..44765dbe74b46 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -241,7 +241,7 @@ def _pull_field( def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> List: """ - Interal function to pull field for records, and similar to + Internal function to pull field for records, and similar to _pull_field, but require to return list. And will raise error if has non iterable value. """ diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 0a9daea105b64..cde7a98eb42ae 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -105,7 +105,7 @@ def write( table = self.api.Table.from_pandas(df, **from_pandas_kwargs) # write_to_dataset does not support a file-like object when - # a dircetory path is used, so just pass the path string. + # a directory path is used, so just pass the path string. if partition_cols is not None: self.api.parquet.write_to_dataset( table, @@ -190,7 +190,7 @@ def read(self, path, columns=None, **kwargs): # When path is s3:// an S3File is returned. # We need to retain the original path(str) while also - # pass the S3File().open function to fsatparquet impl. + # pass the S3File().open function to fastparquet impl. s3, filesystem = get_file_and_filesystem(path) try: parquet_file = self.api.ParquetFile(path, open_with=filesystem.open) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 82380d456cd6d..9b3a29026448e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2626,7 +2626,7 @@ class GenericFixed(Fixed): _reverse_index_map = {v: k for k, v in _index_type_map.items()} attributes: List[str] = [] - # indexer helpders + # indexer helpers def _class_to_alias(self, cls) -> str: return self._index_type_map.get(cls, "") @@ -2819,7 +2819,7 @@ def read_index_node( ) -> Index: data = node[start:stop] # If the index was an empty array write_array_empty() will - # have written a sentinel. Here we relace it with the original. + # have written a sentinel. Here we replace it with the original. if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0: data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type,) kind = _ensure_decoded(node._v_attrs.kind) @@ -3592,7 +3592,7 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): ) # evaluate the passed data_columns, True == use all columns - # take only valide axis labels + # take only valid axis labels if data_columns is True: data_columns = list(axis_labels) elif data_columns is None: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index f445f05c2ee05..3ce1680c109f9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -627,7 +627,7 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"): def generate_value_label(self, byteorder: str) -> bytes: """ - Generate the binary representation of the value labals. + Generate the binary representation of the value labels. Parameters ---------- diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 748b83aec88f4..467bdf7e0745d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1694,7 +1694,7 @@ def _find_backend(backend: str): try: return _backends[backend] except KeyError: - # Fall back to unregisted, module name approach. + # Fall back to unregistered, module name approach. try: module = importlib.import_module(backend) except ImportError: diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 46941e437a4ce..19a75eb151782 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -282,10 +282,10 @@ def _maybe_right_yaxis(self, ax, axes_num): return self._get_ax_layer(ax) if hasattr(ax, "right_ax"): - # if it has right_ax proparty, ``ax`` must be left axes + # if it has right_ax property, ``ax`` must be left axes return ax.right_ax elif hasattr(ax, "left_ax"): - # if it has left_ax proparty, ``ax`` must be right axes + # if it has left_ax property, ``ax`` must be right axes return ax else: # otherwise, create twin axes @@ -387,7 +387,7 @@ def _compute_plot_data(self): if self.include_bool is True: include_type.append(np.bool_) - # GH22799, exclude datatime-like type for boxplot + # GH22799, exclude datetime-like type for boxplot exclude_type = None if self._kind == "box": # TODO: change after solving issue 27881 @@ -1103,7 +1103,7 @@ def _make_plot(self): @classmethod def _plot(cls, ax, x, y, style=None, column_num=None, stacking_id=None, **kwds): - # column_num is used to get the target column from protf in line and + # column_num is used to get the target column from plotf in line and # area plots if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(y)) diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 7319e8de3ec6e..0cafcfed38a54 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -385,7 +385,7 @@ def parallel_coordinates( def lag_plot(series, lag=1, ax=None, **kwds): - # workaround because `c='b'` is hardcoded in matplotlibs scatter method + # workaround because `c='b'` is hardcoded in matplotlib's scatter method import matplotlib.pyplot as plt kwds.setdefault("c", plt.rcParams["patch.facecolor"]) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 08d945f679810..ef8376bfef8a9 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -277,7 +277,7 @@ def _remove_labels_from_axis(axis): t.set_visible(False) # set_visible will not be effective if - # minor axis has NullLocator and NullFormattor (default) + # minor axis has NullLocator and NullFormatter (default) if isinstance(axis.get_minor_locator(), ticker.NullLocator): axis.set_minor_locator(ticker.AutoLocator()) if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index e1de9d1bcf832..aabc10609b78b 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -269,7 +269,7 @@ def apply(self, other): def apply_index(self, i): """ Vectorized apply of DateOffset to DatetimeIndex, - raises NotImplentedError for offsets without a + raises NotImplementedError for offsets without a vectorized implementation. Parameters @@ -667,7 +667,7 @@ def _get_business_hours_by_sec(self, start, end): """ Return business hours in a day by seconds. """ - # create dummy datetime to calculate businesshours in a day + # create dummy datetime to calculate business hours in a day dtstart = datetime(2014, 4, 1, start.hour, start.minute) day = 1 if start < end else 2 until = datetime(2014, 4, day, end.hour, end.minute) @@ -2216,7 +2216,7 @@ def _rollback_to_year(self, other): # roll adjustment qtr_lens = self.get_weeks(norm) - # check thet qtr_lens is consistent with self._offset addition + # check that qtr_lens is consistent with self._offset addition end = liboffsets.shift_day(start, days=7 * sum(qtr_lens)) assert self._offset.is_on_offset(end), (start, end, qtr_lens) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index b7bdbde5bac5e..26b5df862a07e 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -345,7 +345,7 @@ def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: *args : str or callable The string / docstring / docstring template to be appended in order after default docstring under function. - **kwags : str + **kwargs : str The string which would be used to format docstring template. """ diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py index 71965b8e7dd9d..f413490764124 100644 --- a/pandas/util/_doctools.py +++ b/pandas/util/_doctools.py @@ -23,7 +23,7 @@ def __init__( def _shape(self, df: pd.DataFrame) -> Tuple[int, int]: """ - Calculate table chape considering index levels. + Calculate table shape considering index levels. """ row, col = df.shape return row + df.columns.nlevels, col + df.index.nlevels diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 53a25eb321b73..fbb44408f01be 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -353,7 +353,7 @@ def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: """ Validate percentiles (used by describe and quantile). - This function checks if the given float oriterable of floats is a valid percentile + This function checks if the given float or iterable of floats is a valid percentile otherwise raises a ValueError. Parameters