diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0779f9c95f7b4..18f3644a0e0ae 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,6 +27,7 @@ repos: rev: v0.9.1 hooks: - id: cython-lint + - id: double-quote-cython-strings - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 7b9fe6422544c..fcd30ab1faec8 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -180,7 +180,7 @@ def is_lexsorted(list_of_arrays: list) -> bint: cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) for i in range(nlevels): arr = list_of_arrays[i] - assert arr.dtype.name == 'int64' + assert arr.dtype.name == "int64" vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? @@ -514,9 +514,9 @@ def validate_limit(nobs: int | None, limit=None) -> int: lim = nobs else: if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') + raise ValueError("Limit must be an integer") if limit < 1: - raise ValueError('Limit must be greater than 0') + raise ValueError("Limit must be greater than 0") lim = limit return lim @@ -958,7 +958,7 @@ def rank_1d( if not ascending: tiebreak = TIEBREAK_FIRST_DESCENDING - keep_na = na_option == 'keep' + keep_na = na_option == "keep" N = len(values) if labels is not None: @@ -984,7 +984,7 @@ def rank_1d( # with mask, without obfuscating location of missing data # in values array if numeric_object_t is object and values.dtype != np.object_: - masked_vals = values.astype('O') + masked_vals = values.astype("O") else: masked_vals = values.copy() @@ -1005,7 +1005,7 @@ def rank_1d( # If descending, fill with highest value since descending # will flip the ordering to still end up with lowest rank. # Symmetric logic applies to `na_option == 'bottom'` - nans_rank_highest = ascending ^ (na_option == 'top') + nans_rank_highest = ascending ^ (na_option == "top") nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) if nans_rank_highest: order = [masked_vals, mask] @@ -1345,7 +1345,7 @@ def rank_2d( if not ascending: tiebreak = TIEBREAK_FIRST_DESCENDING - keep_na = na_option == 'keep' + keep_na = na_option == "keep" # For cases where a mask is not possible, we can avoid mask checks check_mask = ( @@ -1362,9 +1362,9 @@ def rank_2d( if numeric_object_t is object: if values.dtype != np.object_: - values = values.astype('O') + values = values.astype("O") - nans_rank_highest = ascending ^ (na_option == 'top') + nans_rank_highest = ascending ^ (na_option == "top") if check_mask: nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, 0) @@ -1385,7 +1385,7 @@ def rank_2d( order = (values, ~np.asarray(mask)) n, k = (values).shape - out = np.empty((n, k), dtype='f8', order='F') + out = np.empty((n, k), dtype="f8", order="F") grp_sizes = np.ones(n, dtype=np.int64) # lexsort is slower, so only use if we need to worry about the mask diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index a351ad6e461f3..a5b9bf02dcbe2 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -604,12 +604,12 @@ def group_any_all( intp_t lab int8_t flag_val, val - if val_test == 'all': + if val_test == "all": # Because the 'all' value of an empty iterable in Python is True we can # start with an array full of ones and set to zero when a False value # is encountered flag_val = 0 - elif val_test == 'any': + elif val_test == "any": # Because the 'any' value of an empty iterable in Python is False we # can start with an array full of zeros and set to one only if any # value encountered is True @@ -1061,7 +1061,7 @@ def group_ohlc( N, K = (values).shape if out.shape[1] != 4: - raise ValueError('Output array must have 4 columns') + raise ValueError("Output array must have 4 columns") if K > 1: raise NotImplementedError("Argument 'values' must have only one dimension") @@ -1157,11 +1157,11 @@ def group_quantile( ) inter_methods = { - 'linear': INTERPOLATION_LINEAR, - 'lower': INTERPOLATION_LOWER, - 'higher': INTERPOLATION_HIGHER, - 'nearest': INTERPOLATION_NEAREST, - 'midpoint': INTERPOLATION_MIDPOINT, + "linear": INTERPOLATION_LINEAR, + "lower": INTERPOLATION_LOWER, + "higher": INTERPOLATION_HIGHER, + "nearest": INTERPOLATION_NEAREST, + "midpoint": INTERPOLATION_MIDPOINT, } interp = inter_methods[interpolation] diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 27edc83c6f329..eb4e957f644ac 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -184,8 +184,8 @@ cdef class IndexEngine: if self.is_monotonic_increasing: values = self.values try: - left = values.searchsorted(val, side='left') - right = values.searchsorted(val, side='right') + left = values.searchsorted(val, side="left") + right = values.searchsorted(val, side="right") except TypeError: # e.g. GH#29189 get_loc(None) with a Float64Index # 2021-09-29 Now only reached for object-dtype @@ -353,8 +353,8 @@ cdef class IndexEngine: remaining_stargets = set() for starget in stargets: try: - start = values.searchsorted(starget, side='left') - end = values.searchsorted(starget, side='right') + start = values.searchsorted(starget, side="left") + end = values.searchsorted(starget, side="right") except TypeError: # e.g. if we tried to search for string in int array remaining_stargets.add(starget) else: @@ -551,7 +551,7 @@ cdef class DatetimeEngine(Int64Engine): return self._get_loc_duplicates(conv) values = self.values - loc = values.searchsorted(conv, side='left') + loc = values.searchsorted(conv, side="left") if loc == len(values) or values[loc] != conv: raise KeyError(val) @@ -655,8 +655,8 @@ cdef class BaseMultiIndexCodesEngine: # with positive integers (-1 for NaN becomes 1). This enables us to # differentiate between values that are missing in other and matching # NaNs. We will set values that are not found to 0 later: - labels_arr = np.array(labels, dtype='int64').T + multiindex_nulls_shift - codes = labels_arr.astype('uint64', copy=False) + labels_arr = np.array(labels, dtype="int64").T + multiindex_nulls_shift + codes = labels_arr.astype("uint64", copy=False) self.level_has_nans = [-1 in lab for lab in labels] # Map each codes combination in the index to an integer unambiguously @@ -693,7 +693,7 @@ cdef class BaseMultiIndexCodesEngine: if self.level_has_nans[i] and codes.hasnans: result[codes.isna()] += 1 level_codes.append(result) - return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) + return self._codes_to_ints(np.array(level_codes, dtype="uint64").T) def get_indexer(self, target: np.ndarray) -> np.ndarray: """ @@ -754,12 +754,12 @@ cdef class BaseMultiIndexCodesEngine: ndarray[int64_t, ndim=1] new_codes, new_target_codes ndarray[intp_t, ndim=1] sorted_indexer - target_order = np.argsort(target).astype('int64') + target_order = np.argsort(target).astype("int64") target_values = target[target_order] num_values, num_target_values = len(values), len(target_values) new_codes, new_target_codes = ( - np.empty((num_values,)).astype('int64'), - np.empty((num_target_values,)).astype('int64'), + np.empty((num_values,)).astype("int64"), + np.empty((num_target_values,)).astype("int64"), ) # `values` and `target_values` are both sorted, so we walk through them @@ -809,7 +809,7 @@ cdef class BaseMultiIndexCodesEngine: raise KeyError(key) # Transform indices into single integer: - lab_int = self._codes_to_ints(np.array(indices, dtype='uint64')) + lab_int = self._codes_to_ints(np.array(indices, dtype="uint64")) return self._base.get_loc(self, lab_int) @@ -940,8 +940,8 @@ cdef class SharedEngine: if self.is_monotonic_increasing: values = self.values try: - left = values.searchsorted(val, side='left') - right = values.searchsorted(val, side='right') + left = values.searchsorted(val, side="left") + right = values.searchsorted(val, side="right") except TypeError: # e.g. GH#29189 get_loc(None) with a Float64Index raise KeyError(val) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 43e33ef3e7d7e..ee51a4fd402fb 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -69,7 +69,7 @@ cdef class BlockPlacement: or not cnp.PyArray_ISWRITEABLE(val) or (val).descr.type_num != cnp.NPY_INTP ): - arr = np.require(val, dtype=np.intp, requirements='W') + arr = np.require(val, dtype=np.intp, requirements="W") else: arr = val # Caller is responsible for ensuring arr.ndim == 1 diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 7ed635718e674..5b2cb880195ec 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -42,7 +42,7 @@ from pandas._libs.tslibs.util cimport ( is_timedelta64_object, ) -VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) +VALID_CLOSED = frozenset(["left", "right", "both", "neither"]) cdef class IntervalMixin: @@ -59,7 +59,7 @@ cdef class IntervalMixin: bool True if the Interval is closed on the left-side. """ - return self.closed in ('left', 'both') + return self.closed in ("left", "both") @property def closed_right(self): @@ -73,7 +73,7 @@ cdef class IntervalMixin: bool True if the Interval is closed on the left-side. """ - return self.closed in ('right', 'both') + return self.closed in ("right", "both") @property def open_left(self): @@ -172,9 +172,9 @@ cdef class IntervalMixin: >>> pd.IntervalIndex(ivs).is_empty array([ True, False]) """ - return (self.right == self.left) & (self.closed != 'both') + return (self.right == self.left) & (self.closed != "both") - def _check_closed_matches(self, other, name='other'): + def _check_closed_matches(self, other, name="other"): """ Check if the closed attribute of `other` matches. @@ -197,9 +197,9 @@ cdef class IntervalMixin: cdef bint _interval_like(other): - return (hasattr(other, 'left') - and hasattr(other, 'right') - and hasattr(other, 'closed')) + return (hasattr(other, "left") + and hasattr(other, "right") + and hasattr(other, "closed")) cdef class Interval(IntervalMixin): @@ -311,7 +311,7 @@ cdef class Interval(IntervalMixin): Either ``left``, ``right``, ``both`` or ``neither``. """ - def __init__(self, left, right, str closed='right'): + def __init__(self, left, right, str closed="right"): # note: it is faster to just do these checks than to use a special # constructor (__cinit__/__new__) to avoid them @@ -343,8 +343,8 @@ cdef class Interval(IntervalMixin): def __contains__(self, key) -> bool: if _interval_like(key): - key_closed_left = key.closed in ('left', 'both') - key_closed_right = key.closed in ('right', 'both') + key_closed_left = key.closed in ("left", "both") + key_closed_right = key.closed in ("right", "both") if self.open_left and key_closed_left: left_contained = self.left < key.left else: @@ -389,15 +389,15 @@ cdef class Interval(IntervalMixin): left, right = self._repr_base() name = type(self).__name__ - repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' + repr_str = f"{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})" return repr_str def __str__(self) -> str: left, right = self._repr_base() - start_symbol = '[' if self.closed_left else '(' - end_symbol = ']' if self.closed_right else ')' - return f'{start_symbol}{left}, {right}{end_symbol}' + start_symbol = "[" if self.closed_left else "(" + end_symbol = "]" if self.closed_right else ")" + return f"{start_symbol}{left}, {right}{end_symbol}" def __add__(self, y): if ( diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4890f82c5fdda..e35cf2fb13768 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -394,7 +394,7 @@ def dicts_to_array(dicts: list, columns: list): k = len(columns) n = len(dicts) - result = np.empty((n, k), dtype='O') + result = np.empty((n, k), dtype="O") for i in range(n): row = dicts[i] @@ -768,7 +768,7 @@ def is_all_arraylike(obj: list) -> bool: for i in range(n): val = obj[i] if not (isinstance(val, list) or - util.is_array(val) or hasattr(val, '_data')): + util.is_array(val) or hasattr(val, "_data")): # TODO: EA? # exclude tuples, frozensets as they may be contained in an Index all_arrays = False @@ -786,7 +786,7 @@ def is_all_arraylike(obj: list) -> bool: @cython.boundscheck(False) @cython.wraparound(False) def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, - object closed='left', bint hasnans=False): + object closed="left", bint hasnans=False): """ Int64 (datetime64) version of generic python version in ``groupby.py``. """ @@ -794,7 +794,7 @@ def generate_bins_dt64(ndarray[int64_t, ndim=1] values, const int64_t[:] binner, Py_ssize_t lenidx, lenbin, i, j, bc ndarray[int64_t, ndim=1] bins int64_t r_bin, nat_count - bint right_closed = closed == 'right' + bint right_closed = closed == "right" nat_count = 0 if hasnans: @@ -873,7 +873,7 @@ def get_level_sorter( for i in range(len(starts) - 1): l, r = starts[i], starts[i + 1] - out[l:r] = l + codes[l:r].argsort(kind='mergesort') + out[l:r] = l + codes[l:r].argsort(kind="mergesort") return out @@ -892,7 +892,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, n, k = (mask).shape if axis == 0: - counts = np.zeros((max_bin, k), dtype='i8') + counts = np.zeros((max_bin, k), dtype="i8") with nogil: for i in range(n): for j in range(k): @@ -900,7 +900,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, counts[labels[i], j] += 1 else: # axis == 1 - counts = np.zeros((n, max_bin), dtype='i8') + counts = np.zeros((n, max_bin), dtype="i8") with nogil: for i in range(n): for j in range(k): @@ -1051,7 +1051,7 @@ cpdef bint is_decimal(object obj): cpdef bint is_interval(object obj): - return getattr(obj, '_typ', '_typ') == 'interval' + return getattr(obj, "_typ", "_typ") == "interval" def is_period(val: object) -> bool: @@ -1163,17 +1163,17 @@ _TYPE_MAP = { # types only exist on certain platform try: np.float128 - _TYPE_MAP['float128'] = 'floating' + _TYPE_MAP["float128"] = "floating" except AttributeError: pass try: np.complex256 - _TYPE_MAP['complex256'] = 'complex' + _TYPE_MAP["complex256"] = "complex" except AttributeError: pass try: np.float16 - _TYPE_MAP['float16'] = 'floating' + _TYPE_MAP["float16"] = "floating" except AttributeError: pass @@ -1921,7 +1921,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: for i in range(n): base_val = values[i] if base_val is not NaT and base_val is not None and not util.is_nan(base_val): - base_tz = getattr(base_val, 'tzinfo', None) + base_tz = getattr(base_val, "tzinfo", None) break for j in range(i, n): @@ -1929,7 +1929,7 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: # NaT can coexist with tz-aware datetimes, so skip if encountered val = values[j] if val is not NaT and val is not None and not util.is_nan(val): - tz = getattr(val, 'tzinfo', None) + tz = getattr(val, "tzinfo", None) if not tz_compare(base_tz, tz): return False @@ -2133,7 +2133,7 @@ def maybe_convert_numeric( returns a boolean mask for the converted values, otherwise returns None. """ if len(values) == 0: - return (np.array([], dtype='i8'), None) + return (np.array([], dtype="i8"), None) # fastpath for ints - try to convert all based on first value cdef: @@ -2141,7 +2141,7 @@ def maybe_convert_numeric( if util.is_integer_object(val): try: - maybe_ints = values.astype('i8') + maybe_ints = values.astype("i8") if (maybe_ints == values).all(): return (maybe_ints, None) except (ValueError, OverflowError, TypeError): @@ -2231,7 +2231,7 @@ def maybe_convert_numeric( mask[i] = 1 seen.saw_null() floats[i] = complexes[i] = NaN - elif hasattr(val, '__len__') and len(val) == 0: + elif hasattr(val, "__len__") and len(val) == 0: if convert_empty or seen.coerce_numeric: seen.saw_null() floats[i] = complexes[i] = NaN @@ -2469,7 +2469,7 @@ def maybe_convert_objects(ndarray[object] objects, # if we have an tz's attached then return the objects if convert_datetime: - if getattr(val, 'tzinfo', None) is not None: + if getattr(val, "tzinfo", None) is not None: seen.datetimetz_ = True break else: @@ -2900,11 +2900,11 @@ def fast_multiget(dict mapping, ndarray keys, default=np.nan) -> np.ndarray: cdef: Py_ssize_t i, n = len(keys) object val - ndarray[object] output = np.empty(n, dtype='O') + ndarray[object] output = np.empty(n, dtype="O") if n == 0: # kludge, for Series - return np.empty(0, dtype='f8') + return np.empty(0, dtype="f8") for i in range(n): val = keys[i] diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index 308756e378dde..478e7eaee90c1 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -66,7 +66,7 @@ def scalar_compare(object[:] values, object val, object op) -> ndarray: elif op is operator.ne: flag = Py_NE else: - raise ValueError('Unrecognized operator') + raise ValueError("Unrecognized operator") result = np.empty(n, dtype=bool).view(np.uint8) isnull_val = checknull(val) @@ -134,7 +134,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr int flag if n != len(right): - raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + raise ValueError(f"Arrays were different lengths: {n} vs {len(right)}") if op is operator.lt: flag = Py_LT @@ -149,7 +149,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr elif op is operator.ne: flag = Py_NE else: - raise ValueError('Unrecognized operator') + raise ValueError("Unrecognized operator") result = np.empty(n, dtype=bool).view(np.uint8) @@ -234,7 +234,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray: object[::1] result if n != len(right): - raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + raise ValueError(f"Arrays were different lengths: {n} vs {len(right)}") result = np.empty(n, dtype=object) @@ -271,8 +271,8 @@ def maybe_convert_bool(ndarray[object] arr, result = np.empty(n, dtype=np.uint8) mask = np.zeros(n, dtype=np.uint8) # the defaults - true_vals = {'True', 'TRUE', 'true'} - false_vals = {'False', 'FALSE', 'false'} + true_vals = {"True", "TRUE", "true"} + false_vals = {"False", "FALSE", "false"} if true_values is not None: true_vals = true_vals | set(true_values) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 85d74e201d5bb..73005c7b5cfa0 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -342,7 +342,7 @@ cdef class TextReader: set unnamed_cols # set[str] def __cinit__(self, source, - delimiter=b',', # bytes | str + delimiter=b",", # bytes | str header=0, int64_t header_start=0, uint64_t header_end=0, @@ -358,7 +358,7 @@ cdef class TextReader: quoting=0, # int lineterminator=None, # bytes | str comment=None, - decimal=b'.', # bytes | str + decimal=b".", # bytes | str thousands=None, # bytes | str dtype=None, usecols=None, @@ -403,7 +403,7 @@ cdef class TextReader: self.parser.delim_whitespace = delim_whitespace else: if len(delimiter) > 1: - raise ValueError('only length-1 separators excluded right now') + raise ValueError("only length-1 separators excluded right now") self.parser.delimiter = ord(delimiter) # ---------------------------------------- @@ -415,26 +415,26 @@ cdef class TextReader: if lineterminator is not None: if len(lineterminator) != 1: - raise ValueError('Only length-1 line terminators supported') + raise ValueError("Only length-1 line terminators supported") self.parser.lineterminator = ord(lineterminator) if len(decimal) != 1: - raise ValueError('Only length-1 decimal markers supported') + raise ValueError("Only length-1 decimal markers supported") self.parser.decimal = ord(decimal) if thousands is not None: if len(thousands) != 1: - raise ValueError('Only length-1 thousands markers supported') + raise ValueError("Only length-1 thousands markers supported") self.parser.thousands = ord(thousands) if escapechar is not None: if len(escapechar) != 1: - raise ValueError('Only length-1 escapes supported') + raise ValueError("Only length-1 escapes supported") self.parser.escapechar = ord(escapechar) self._set_quoting(quotechar, quoting) - dtype_order = ['int64', 'float64', 'bool', 'object'] + dtype_order = ["int64", "float64", "bool", "object"] if quoting == QUOTE_NONNUMERIC: # consistent with csv module semantics, cast all to float dtype_order = dtype_order[1:] @@ -442,7 +442,7 @@ cdef class TextReader: if comment is not None: if len(comment) > 1: - raise ValueError('Only length-1 comment characters supported') + raise ValueError("Only length-1 comment characters supported") self.parser.commentchar = ord(comment) self.parser.on_bad_lines = on_bad_lines @@ -491,8 +491,8 @@ cdef class TextReader: elif float_precision == "high" or float_precision is None: self.parser.double_converter = precise_xstrtod else: - raise ValueError(f'Unrecognized float_precision option: ' - f'{float_precision}') + raise ValueError(f"Unrecognized float_precision option: " + f"{float_precision}") # Caller is responsible for ensuring we have one of # - None @@ -582,7 +582,7 @@ cdef class TextReader: dtype = type(quote_char).__name__ raise TypeError(f'"quotechar" must be string, not {dtype}') - if quote_char is None or quote_char == '': + if quote_char is None or quote_char == "": if quoting != QUOTE_NONE: raise TypeError("quotechar must be set if quoting enabled") self.parser.quoting = quoting @@ -647,11 +647,11 @@ cdef class TextReader: self.parser.lines < hr): msg = self.orig_header if isinstance(msg, list): - joined = ','.join(str(m) for m in msg) + joined = ",".join(str(m) for m in msg) msg = f"[{joined}], len of {len(msg)}," raise ParserError( - f'Passed header={msg} but only ' - f'{self.parser.lines} lines in file') + f"Passed header={msg} but only " + f"{self.parser.lines} lines in file") else: field_count = self.parser.line_fields[hr] @@ -666,11 +666,11 @@ cdef class TextReader: name = PyUnicode_DecodeUTF8(word, strlen(word), self.encoding_errors) - if name == '': + if name == "": if self.has_mi_columns: - name = f'Unnamed: {i}_level_{level}' + name = f"Unnamed: {i}_level_{level}" else: - name = f'Unnamed: {i}' + name = f"Unnamed: {i}" unnamed_count += 1 unnamed_col_indices.append(i) @@ -693,7 +693,7 @@ cdef class TextReader: if cur_count > 0: while cur_count > 0: counts[old_col] = cur_count + 1 - col = f'{old_col}.{cur_count}' + col = f"{old_col}.{cur_count}" if col in this_header: cur_count += 1 else: @@ -779,8 +779,8 @@ cdef class TextReader: elif self.names is None and nuse < passed_count: self.leading_cols = field_count - passed_count elif passed_count != field_count: - raise ValueError('Number of passed names did not match number of ' - 'header fields in the file') + raise ValueError("Number of passed names did not match number of " + "header fields in the file") # oh boy, #2442, #2981 elif self.allow_leading_cols and passed_count < field_count: self.leading_cols = field_count - passed_count @@ -854,7 +854,7 @@ cdef class TextReader: self.parser.warn_msg = NULL if status < 0: - raise_parser_error('Error tokenizing data', self.parser) + raise_parser_error("Error tokenizing data", self.parser) # -> dict[int, "ArrayLike"] cdef _read_rows(self, rows, bint trim): @@ -871,8 +871,8 @@ cdef class TextReader: self._tokenize_rows(irows - buffered_lines) if self.skipfooter > 0: - raise ValueError('skipfooter can only be used to read ' - 'the whole file') + raise ValueError("skipfooter can only be used to read " + "the whole file") else: with nogil: status = tokenize_all_rows(self.parser, self.encoding_errors) @@ -885,15 +885,15 @@ cdef class TextReader: self.parser.warn_msg = NULL if status < 0: - raise_parser_error('Error tokenizing data', self.parser) + raise_parser_error("Error tokenizing data", self.parser) if self.parser_start >= self.parser.lines: raise StopIteration - self._end_clock('Tokenization') + self._end_clock("Tokenization") self._start_clock() columns = self._convert_column_data(rows) - self._end_clock('Type conversion') + self._end_clock("Type conversion") self._start_clock() if len(columns) > 0: rows_read = len(list(columns.values())[0]) @@ -903,7 +903,7 @@ cdef class TextReader: parser_trim_buffers(self.parser) self.parser_start -= rows_read - self._end_clock('Parser memory cleanup') + self._end_clock("Parser memory cleanup") return columns @@ -913,7 +913,7 @@ cdef class TextReader: cdef _end_clock(self, str what): if self.verbose: elapsed = time.time() - self.clocks.pop(-1) - print(f'{what} took: {elapsed * 1000:.2f} ms') + print(f"{what} took: {elapsed * 1000:.2f} ms") def set_noconvert(self, i: int) -> None: self.noconvert.add(i) @@ -1060,7 +1060,7 @@ cdef class TextReader: ) if col_res is None: - raise ParserError(f'Unable to parse column {i}') + raise ParserError(f"Unable to parse column {i}") results[i] = col_res @@ -1098,11 +1098,11 @@ cdef class TextReader: # dtype successfully. As a result, we leave the data # column AS IS with object dtype. col_res, na_count = self._convert_with_dtype( - np.dtype('object'), i, start, end, 0, + np.dtype("object"), i, start, end, 0, 0, na_hashset, na_flist) except OverflowError: col_res, na_count = self._convert_with_dtype( - np.dtype('object'), i, start, end, na_filter, + np.dtype("object"), i, start, end, na_filter, 0, na_hashset, na_flist) if col_res is not None: @@ -1131,7 +1131,7 @@ cdef class TextReader: # only allow safe casts, eg. with a nan you cannot safely cast to int try: - col_res = col_res.astype(col_dtype, casting='safe') + col_res = col_res.astype(col_dtype, casting="safe") except TypeError: # float -> int conversions can fail the above @@ -1200,7 +1200,7 @@ cdef class TextReader: na_filter, na_hashset) na_count = 0 - if result is not None and dtype != 'int64': + if result is not None and dtype != "int64": result = result.astype(dtype) return result, na_count @@ -1209,7 +1209,7 @@ cdef class TextReader: result, na_count = _try_double(self.parser, i, start, end, na_filter, na_hashset, na_flist) - if result is not None and dtype != 'float64': + if result is not None and dtype != "float64": result = result.astype(dtype) return result, na_count elif is_bool_dtype(dtype): @@ -1221,7 +1221,7 @@ cdef class TextReader: raise ValueError(f"Bool column has NA values in column {i}") return result, na_count - elif dtype.kind == 'S': + elif dtype.kind == "S": # TODO: na handling width = dtype.itemsize if width > 0: @@ -1231,7 +1231,7 @@ cdef class TextReader: # treat as a regular string parsing return self._string_convert(i, start, end, na_filter, na_hashset) - elif dtype.kind == 'U': + elif dtype.kind == "U": width = dtype.itemsize if width > 0: raise TypeError(f"the dtype {dtype} is not supported for parsing") @@ -1345,8 +1345,8 @@ cdef _close(TextReader reader): cdef: - object _true_values = [b'True', b'TRUE', b'true'] - object _false_values = [b'False', b'FALSE', b'false'] + object _true_values = [b"True", b"TRUE", b"true"] + object _false_values = [b"False", b"FALSE", b"false"] def _ensure_encoded(list lst): @@ -1356,7 +1356,7 @@ def _ensure_encoded(list lst): if isinstance(x, str): x = PyUnicode_AsUTF8String(x) elif not isinstance(x, bytes): - x = str(x).encode('utf-8') + x = str(x).encode("utf-8") result.append(x) return result @@ -1565,7 +1565,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, char *data ndarray result - result = np.empty(line_end - line_start, dtype=f'|S{width}') + result = np.empty(line_end - line_start, dtype=f"|S{width}") data = result.data with nogil: @@ -1591,13 +1591,13 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, cdef: - char* cinf = b'inf' - char* cposinf = b'+inf' - char* cneginf = b'-inf' + char* cinf = b"inf" + char* cposinf = b"+inf" + char* cneginf = b"-inf" - char* cinfty = b'Infinity' - char* cposinfty = b'+Infinity' - char* cneginfty = b'-Infinity' + char* cinfty = b"Infinity" + char* cposinfty = b"+Infinity" + char* cneginfty = b"-Infinity" # -> tuple[ndarray[float64_t], int] | tuple[None, None] @@ -1726,14 +1726,14 @@ cdef _try_uint64(parser_t *parser, int64_t col, if error != 0: if error == ERROR_OVERFLOW: # Can't get the word variable - raise OverflowError('Overflow') + raise OverflowError("Overflow") return None if uint64_conflict(&state): - raise ValueError('Cannot convert to numerical dtype') + raise ValueError("Cannot convert to numerical dtype") if state.seen_sint: - raise OverflowError('Overflow') + raise OverflowError("Overflow") return result @@ -1796,7 +1796,7 @@ cdef _try_int64(parser_t *parser, int64_t col, if error != 0: if error == ERROR_OVERFLOW: # Can't get the word variable - raise OverflowError('Overflow') + raise OverflowError("Overflow") return None, None return result, na_count @@ -1944,7 +1944,7 @@ cdef kh_str_starts_t* kset_from_list(list values) except NULL: # None creeps in sometimes, which isn't possible here if not isinstance(val, bytes): kh_destroy_str_starts(table) - raise ValueError('Must be all encoded bytes') + raise ValueError("Must be all encoded bytes") kh_put_str_starts_item(table, PyBytes_AsString(val), &ret) @@ -2009,11 +2009,11 @@ cdef raise_parser_error(object base, parser_t *parser): Py_XDECREF(type) raise old_exc - message = f'{base}. C error: ' + message = f"{base}. C error: " if parser.error_msg != NULL: - message += parser.error_msg.decode('utf-8') + message += parser.error_msg.decode("utf-8") else: - message += 'no error message set' + message += "no error message set" raise ParserError(message) @@ -2078,7 +2078,7 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col, cdef list _maybe_encode(list values): if values is None: return [] - return [x.encode('utf-8') if isinstance(x, str) else x for x in values] + return [x.encode("utf-8") if isinstance(x, str) else x for x in values] def sanitize_objects(ndarray[object] values, set na_values) -> int: diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 3354290a5f535..33cd2ef27a995 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -14,7 +14,7 @@ cdef class CachedProperty: def __init__(self, fget): self.fget = fget self.name = fget.__name__ - self.__doc__ = getattr(fget, '__doc__', None) + self.__doc__ = getattr(fget, "__doc__", None) def __get__(self, obj, typ): if obj is None: @@ -22,7 +22,7 @@ cdef class CachedProperty: return self # Get the cache or set a default one if needed - cache = getattr(obj, '_cache', None) + cache = getattr(obj, "_cache", None) if cache is None: try: cache = obj._cache = {} diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index a012bd92cd573..946ba5ddaa248 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -103,7 +103,7 @@ def explode(ndarray[object] values): # find the resulting len n = len(values) - counts = np.zeros(n, dtype='int64') + counts = np.zeros(n, dtype="int64") for i in range(n): v = values[i] @@ -116,7 +116,7 @@ def explode(ndarray[object] values): else: counts[i] += 1 - result = np.empty(counts.sum(), dtype='object') + result = np.empty(counts.sum(), dtype="object") count = 0 for i in range(n): v = values[i] diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 031417fa50be0..45ddade7b4eb5 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -62,8 +62,8 @@ cdef class IntIndex(SparseIndex): return IntIndex, args def __repr__(self) -> str: - output = 'IntIndex\n' - output += f'Indices: {repr(self.indices)}\n' + output = "IntIndex\n" + output += f"Indices: {repr(self.indices)}\n" return output @property @@ -134,7 +134,7 @@ cdef class IntIndex(SparseIndex): y = y_.to_int_index() if self.length != y.length: - raise Exception('Indices must reference same underlying length') + raise Exception("Indices must reference same underlying length") xindices = self.indices yindices = y.indices @@ -168,7 +168,7 @@ cdef class IntIndex(SparseIndex): y = y_.to_int_index() if self.length != y.length: - raise ValueError('Indices must reference same underlying length') + raise ValueError("Indices must reference same underlying length") new_indices = np.union1d(self.indices, y.indices) return IntIndex(self.length, new_indices) @@ -311,9 +311,9 @@ cdef class BlockIndex(SparseIndex): return BlockIndex, args def __repr__(self) -> str: - output = 'BlockIndex\n' - output += f'Block locations: {repr(self.blocs)}\n' - output += f'Block lengths: {repr(self.blengths)}' + output = "BlockIndex\n" + output += f"Block locations: {repr(self.blocs)}\n" + output += f"Block lengths: {repr(self.blengths)}" return output @@ -340,23 +340,23 @@ cdef class BlockIndex(SparseIndex): blengths = self.blengths if len(blocs) != len(blengths): - raise ValueError('block bound arrays must be same length') + raise ValueError("block bound arrays must be same length") for i in range(self.nblocks): if i > 0: if blocs[i] <= blocs[i - 1]: - raise ValueError('Locations not in ascending order') + raise ValueError("Locations not in ascending order") if i < self.nblocks - 1: if blocs[i] + blengths[i] > blocs[i + 1]: - raise ValueError(f'Block {i} overlaps') + raise ValueError(f"Block {i} overlaps") else: if blocs[i] + blengths[i] > self.length: - raise ValueError(f'Block {i} extends beyond end') + raise ValueError(f"Block {i} extends beyond end") # no zero-length blocks if blengths[i] == 0: - raise ValueError(f'Zero-length block {i}') + raise ValueError(f"Zero-length block {i}") def equals(self, other: object) -> bool: if not isinstance(other, BlockIndex): @@ -411,7 +411,7 @@ cdef class BlockIndex(SparseIndex): y = other.to_block_index() if self.length != y.length: - raise Exception('Indices must reference same underlying length') + raise Exception("Indices must reference same underlying length") xloc = self.blocs xlen = self.blengths @@ -565,7 +565,7 @@ cdef class BlockMerge: self.y = y if x.length != y.length: - raise Exception('Indices must reference same underlying length') + raise Exception("Indices must reference same underlying length") self.xstart = self.x.blocs self.ystart = self.y.blocs @@ -660,7 +660,7 @@ cdef class BlockUnion(BlockMerge): int32_t xi, yi, ynblocks, nend if mode != 0 and mode != 1: - raise Exception('Mode must be 0 or 1') + raise Exception("Mode must be 0 or 1") # so symmetric code will work if mode == 0: diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index b7457f94f3447..733879154b9d6 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -21,15 +21,15 @@ from pandas.core.dtypes.missing import ( cdef bint isiterable(obj): - return hasattr(obj, '__iter__') + return hasattr(obj, "__iter__") cdef bint has_length(obj): - return hasattr(obj, '__len__') + return hasattr(obj, "__len__") cdef bint is_dictlike(obj): - return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + return hasattr(obj, "keys") and hasattr(obj, "__getitem__") cpdef assert_dict_equal(a, b, bint compare_keys=True): @@ -91,7 +91,7 @@ cpdef assert_almost_equal(a, b, Py_ssize_t i, na, nb double fa, fb bint is_unequal = False, a_is_ndarray, b_is_ndarray - str first_diff = '' + str first_diff = "" if lobj is None: lobj = a @@ -110,9 +110,9 @@ cpdef assert_almost_equal(a, b, if obj is None: if a_is_ndarray or b_is_ndarray: - obj = 'numpy array' + obj = "numpy array" else: - obj = 'Iterable' + obj = "Iterable" if isiterable(a): @@ -131,11 +131,11 @@ cpdef assert_almost_equal(a, b, if a.shape != b.shape: from pandas._testing import raise_assert_detail raise_assert_detail( - obj, f'{obj} shapes are different', a.shape, b.shape) + obj, f"{obj} shapes are different", a.shape, b.shape) if check_dtype and not is_dtype_equal(a.dtype, b.dtype): from pandas._testing import assert_attr_equal - assert_attr_equal('dtype', a, b, obj=obj) + assert_attr_equal("dtype", a, b, obj=obj) if array_equivalent(a, b, strict_nan=True): return True diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7fee48c0a5d1f..b78174483be51 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -86,9 +86,9 @@ def _test_parse_iso8601(ts: str): obj = _TSObject() - if ts == 'now': + if ts == "now": return Timestamp.utcnow() - elif ts == 'today': + elif ts == "today": return Timestamp.now().normalize() string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True) @@ -145,7 +145,7 @@ def format_array_from_datetime( cnp.flatiter it = cnp.PyArray_IterNew(values) if na_rep is None: - na_rep = 'NaT' + na_rep = "NaT" if tz is None: # if we don't have a format nor tz, then choose @@ -182,21 +182,21 @@ def format_array_from_datetime( elif basic_format_day: pandas_datetime_to_datetimestruct(val, reso, &dts) - res = f'{dts.year}-{dts.month:02d}-{dts.day:02d}' + res = f"{dts.year}-{dts.month:02d}-{dts.day:02d}" elif basic_format: pandas_datetime_to_datetimestruct(val, reso, &dts) - res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' - f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + res = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") if show_ns: ns = dts.ps // 1000 - res += f'.{ns + dts.us * 1000:09d}' + res += f".{ns + dts.us * 1000:09d}" elif show_us: - res += f'.{dts.us:06d}' + res += f".{dts.us:06d}" elif show_ms: - res += f'.{dts.us // 1000:03d}' + res += f".{dts.us // 1000:03d}" else: @@ -266,9 +266,9 @@ def array_with_unit_to_datetime( int64_t mult int prec = 0 ndarray[float64_t] fvalues - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' - bint is_raise = errors=='raise' + bint is_ignore = errors=="ignore" + bint is_coerce = errors=="coerce" + bint is_raise = errors=="raise" bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult @@ -324,8 +324,8 @@ def array_with_unit_to_datetime( return result, tz - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') + result = np.empty(n, dtype="M8[ns]") + iresult = result.view("i8") try: for i in range(n): @@ -442,7 +442,7 @@ def first_non_null(values: ndarray) -> int: @cython.boundscheck(False) cpdef array_to_datetime( ndarray[object] values, - str errors='raise', + str errors="raise", bint dayfirst=False, bint yearfirst=False, bint utc=False, @@ -494,9 +494,9 @@ cpdef array_to_datetime( bint seen_integer = False bint seen_datetime = False bint seen_datetime_offset = False - bint is_raise = errors=='raise' - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' + bint is_raise = errors=="raise" + bint is_ignore = errors=="ignore" + bint is_coerce = errors=="coerce" bint is_same_offsets _TSObject _ts int64_t value @@ -511,8 +511,8 @@ cpdef array_to_datetime( # specify error conditions assert is_raise or is_ignore or is_coerce - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') + result = np.empty(n, dtype="M8[ns]") + iresult = result.view("i8") try: for i in range(n): @@ -571,7 +571,7 @@ cpdef array_to_datetime( # if we have previously (or in future accept # datetimes/strings, then we must coerce) try: - iresult[i] = cast_from_unit(val, 'ns') + iresult[i] = cast_from_unit(val, "ns") except OverflowError: iresult[i] = NPY_NAT @@ -632,7 +632,7 @@ cpdef array_to_datetime( else: # Add a marker for naive string, to track if we are # parsing mixed naive and aware strings - out_tzoffset_vals.add('naive') + out_tzoffset_vals.add("naive") _ts = convert_datetime_to_tsobject(py_dt, None) iresult[i] = _ts.value @@ -653,7 +653,7 @@ cpdef array_to_datetime( else: # Add a marker for naive string, to track if we are # parsing mixed naive and aware strings - out_tzoffset_vals.add('naive') + out_tzoffset_vals.add("naive") iresult[i] = value check_dts_bounds(&dts) @@ -791,9 +791,9 @@ cdef _array_to_datetime_object( cdef: Py_ssize_t i, n = len(values) object val - bint is_ignore = errors == 'ignore' - bint is_coerce = errors == 'coerce' - bint is_raise = errors == 'raise' + bint is_ignore = errors == "ignore" + bint is_coerce = errors == "coerce" + bint is_raise = errors == "raise" ndarray[object] oresult npy_datetimestruct dts @@ -816,7 +816,7 @@ cdef _array_to_datetime_object( val = str(val) if len(val) == 0 or val in nat_strings: - oresult[i] = 'NaT' + oresult[i] = "NaT" continue try: oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index 00ee15b73f551..19c732e2a313b 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -29,21 +29,21 @@ cdef int32_t* month_offset = [ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] # Canonical location for other modules to find name constants -MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', - 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +MONTHS = ["JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", + "AUG", "SEP", "OCT", "NOV", "DEC"] # The first blank line is consistent with calendar.month_name in the calendar # standard library -MONTHS_FULL = ['', 'January', 'February', 'March', 'April', 'May', 'June', - 'July', 'August', 'September', 'October', 'November', - 'December'] +MONTHS_FULL = ["", "January", "February", "March", "April", "May", "June", + "July", "August", "September", "October", "November", + "December"] MONTH_NUMBERS = {name: num for num, name in enumerate(MONTHS)} cdef dict c_MONTH_NUMBERS = MONTH_NUMBERS MONTH_ALIASES = {(num + 1): name for num, name in enumerate(MONTHS)} MONTH_TO_CAL_NUM = {name: num + 1 for num, name in enumerate(MONTHS)} -DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] -DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', - 'Saturday', 'Sunday'] +DAYS = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] +DAYS_FULL = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", + "Saturday", "Sunday"] int_to_weekday = {num: name for num, name in enumerate(DAYS)} weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 17facf9e16f4b..1b6dace6e90b1 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -76,8 +76,8 @@ from pandas._libs.tslibs.tzconversion cimport ( # ---------------------------------------------------------------------- # Constants -DT64NS_DTYPE = np.dtype('M8[ns]') -TD64NS_DTYPE = np.dtype('m8[ns]') +DT64NS_DTYPE = np.dtype("M8[ns]") +TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- @@ -315,8 +315,8 @@ cdef _TSObject convert_to_tsobject(object ts, tzinfo tz, str unit, if isinstance(ts, Period): raise ValueError("Cannot convert Period to Timestamp " "unambiguously. Use to_timestamp") - raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to ' - f'Timestamp') + raise TypeError(f"Cannot convert input [{ts}] of type {type(ts)} to " + f"Timestamp") maybe_localize_tso(obj, tz, obj.creso) return obj @@ -497,11 +497,11 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, obj.value = NPY_NAT obj.tzinfo = tz return obj - elif ts == 'now': + elif ts == "now": # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns utc dt = datetime.now(tz) - elif ts == 'today': + elif ts == "today": # Issue 9000, we short-circuit rather than going # into np_datetime_strings which returns a normalized datetime dt = datetime.now(tz) @@ -702,19 +702,19 @@ cdef tzinfo convert_timezone( if utc_convert: pass elif found_naive: - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") elif tz_out is not None and not tz_compare(tz_out, tz_in): - raise ValueError('Tz-aware datetime.datetime ' - 'cannot be converted to ' - 'datetime64 unless utc=True') + raise ValueError("Tz-aware datetime.datetime " + "cannot be converted to " + "datetime64 unless utc=True") else: tz_out = tz_in else: if found_tz and not utc_convert: - raise ValueError('Cannot mix tz-aware with ' - 'tz-naive values') + raise ValueError("Cannot mix tz-aware with " + "tz-naive values") return tz_out diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index dda26ad3bebc6..7e5d1d13cbda3 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -73,13 +73,13 @@ def build_field_sarray(const int64_t[:] dtindex, NPY_DATETIMEUNIT reso): out = np.empty(count, dtype=sa_dtype) - years = out['Y'] - months = out['M'] - days = out['D'] - hours = out['h'] - minutes = out['m'] - seconds = out['s'] - mus = out['u'] + years = out["Y"] + months = out["M"] + days = out["D"] + hours = out["h"] + minutes = out["m"] + seconds = out["s"] + mus = out["u"] for i in range(count): pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) @@ -154,11 +154,11 @@ def get_date_name_field( out = np.empty(count, dtype=object) - if field == 'day_name': + if field == "day_name": if locale is None: names = np.array(DAYS_FULL, dtype=np.object_) else: - names = np.array(_get_locale_names('f_weekday', locale), + names = np.array(_get_locale_names("f_weekday", locale), dtype=np.object_) for i in range(count): if dtindex[i] == NPY_NAT: @@ -169,11 +169,11 @@ def get_date_name_field( dow = dayofweek(dts.year, dts.month, dts.day) out[i] = names[dow].capitalize() - elif field == 'month_name': + elif field == "month_name": if locale is None: names = np.array(MONTHS_FULL, dtype=np.object_) else: - names = np.array(_get_locale_names('f_month', locale), + names = np.array(_get_locale_names("f_month", locale), dtype=np.object_) for i in range(count): if dtindex[i] == NPY_NAT: @@ -237,20 +237,20 @@ def get_start_end_field( npy_datetimestruct dts int compare_month, modby - out = np.zeros(count, dtype='int8') + out = np.zeros(count, dtype="int8") if freqstr: - if freqstr == 'C': + if freqstr == "C": raise ValueError(f"Custom business days is not supported by {field}") - is_business = freqstr[0] == 'B' + is_business = freqstr[0] == "B" # YearBegin(), BYearBegin() use month = starting month of year. # QuarterBegin(), BQuarterBegin() use startingMonth = starting # month of year. Other offsets use month, startingMonth as ending # month of year. - if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( - freqstr[1:3] in ['MS', 'QS', 'AS']): + if (freqstr[0:2] in ["MS", "QS", "AS"]) or ( + freqstr[1:3] in ["MS", "QS", "AS"]): end_month = 12 if month_kw == 1 else month_kw - 1 start_month = month_kw else: @@ -339,9 +339,9 @@ def get_date_field( ndarray[int32_t] out npy_datetimestruct dts - out = np.empty(count, dtype='i4') + out = np.empty(count, dtype="i4") - if field == 'Y': + if field == "Y": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -352,7 +352,7 @@ def get_date_field( out[i] = dts.year return out - elif field == 'M': + elif field == "M": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -363,7 +363,7 @@ def get_date_field( out[i] = dts.month return out - elif field == 'D': + elif field == "D": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -374,7 +374,7 @@ def get_date_field( out[i] = dts.day return out - elif field == 'h': + elif field == "h": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -386,7 +386,7 @@ def get_date_field( # TODO: can we de-dup with period.pyx s? return out - elif field == 'm': + elif field == "m": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -397,7 +397,7 @@ def get_date_field( out[i] = dts.min return out - elif field == 's': + elif field == "s": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -408,7 +408,7 @@ def get_date_field( out[i] = dts.sec return out - elif field == 'us': + elif field == "us": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -419,7 +419,7 @@ def get_date_field( out[i] = dts.us return out - elif field == 'ns': + elif field == "ns": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -429,7 +429,7 @@ def get_date_field( pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = dts.ps // 1000 return out - elif field == 'doy': + elif field == "doy": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -440,7 +440,7 @@ def get_date_field( out[i] = get_day_of_year(dts.year, dts.month, dts.day) return out - elif field == 'dow': + elif field == "dow": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -451,7 +451,7 @@ def get_date_field( out[i] = dayofweek(dts.year, dts.month, dts.day) return out - elif field == 'woy': + elif field == "woy": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -462,7 +462,7 @@ def get_date_field( out[i] = get_week_of_year(dts.year, dts.month, dts.day) return out - elif field == 'q': + elif field == "q": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -474,7 +474,7 @@ def get_date_field( out[i] = ((out[i] - 1) // 3) + 1 return out - elif field == 'dim': + elif field == "dim": with nogil: for i in range(count): if dtindex[i] == NPY_NAT: @@ -484,8 +484,8 @@ def get_date_field( pandas_datetime_to_datetimestruct(dtindex[i], reso, &dts) out[i] = get_days_in_month(dts.year, dts.month) return out - elif field == 'is_leap_year': - return isleapyear_arr(get_date_field(dtindex, 'Y', reso=reso)) + elif field == "is_leap_year": + return isleapyear_arr(get_date_field(dtindex, "Y", reso=reso)) raise ValueError(f"Field {field} not supported") @@ -506,9 +506,9 @@ def get_timedelta_field( ndarray[int32_t] out pandas_timedeltastruct tds - out = np.empty(count, dtype='i4') + out = np.empty(count, dtype="i4") - if field == 'days': + if field == "days": with nogil: for i in range(count): if tdindex[i] == NPY_NAT: @@ -519,7 +519,7 @@ def get_timedelta_field( out[i] = tds.days return out - elif field == 'seconds': + elif field == "seconds": with nogil: for i in range(count): if tdindex[i] == NPY_NAT: @@ -530,7 +530,7 @@ def get_timedelta_field( out[i] = tds.seconds return out - elif field == 'microseconds': + elif field == "microseconds": with nogil: for i in range(count): if tdindex[i] == NPY_NAT: @@ -541,7 +541,7 @@ def get_timedelta_field( out[i] = tds.microseconds return out - elif field == 'nanoseconds': + elif field == "nanoseconds": with nogil: for i in range(count): if tdindex[i] == NPY_NAT: @@ -560,7 +560,7 @@ cpdef isleapyear_arr(ndarray years): cdef: ndarray[int8_t] out - out = np.zeros(len(years), dtype='int8') + out = np.zeros(len(years), dtype="int8") out[np.logical_or(years % 400 == 0, np.logical_and(years % 4 == 0, years % 100 > 0))] = 1 diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index dcb7358d8e69a..1f18f8cae4ae8 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -259,7 +259,7 @@ cdef class _NaT(datetime): """ Return a numpy.datetime64 object with 'ns' precision. """ - return np.datetime64('NaT', "ns") + return np.datetime64("NaT", "ns") def to_numpy(self, dtype=None, copy=False) -> np.datetime64 | np.timedelta64: """ diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index d49c41e54764f..e5f683c56da9b 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -211,10 +211,10 @@ cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns): error = True if error: - fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' - f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + fmt = (f"{dts.year}-{dts.month:02d}-{dts.day:02d} " + f"{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}") # TODO: "nanosecond" in the message assumes NPY_FR_ns - raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}') + raise OutOfBoundsDatetime(f"Out of bounds nanosecond timestamp: {fmt}") # ---------------------------------------------------------------------- @@ -289,7 +289,7 @@ cdef inline int string_to_dts( buf = get_c_string_buf_and_size(val, &length) if format is None: - format_buf = b'' + format_buf = b"" format_length = 0 exact = False else: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 4c6493652b216..97554556b0082 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -240,9 +240,9 @@ cdef _get_calendar(weekmask, holidays, calendar): holidays = [_to_dt64D(dt) for dt in holidays] holidays = tuple(sorted(holidays)) - kwargs = {'weekmask': weekmask} + kwargs = {"weekmask": weekmask} if holidays: - kwargs['holidays'] = holidays + kwargs["holidays"] = holidays busdaycalendar = np.busdaycalendar(**kwargs) return busdaycalendar, holidays @@ -253,7 +253,7 @@ cdef _to_dt64D(dt): # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') # numpy.datetime64('2013-05-01T02:00:00.000000+0200') # Thus astype is needed to cast datetime to datetime64[D] - if getattr(dt, 'tzinfo', None) is not None: + if getattr(dt, "tzinfo", None) is not None: # Get the nanosecond timestamp, # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` # The `naive` must be the `dt` naive wall time @@ -274,7 +274,7 @@ cdef _to_dt64D(dt): cdef _validate_business_time(t_input): if isinstance(t_input, str): try: - t = time.strptime(t_input, '%H:%M') + t = time.strptime(t_input, "%H:%M") return dt_time(hour=t.tm_hour, minute=t.tm_min) except ValueError: raise ValueError("time data must match '%H:%M' format") @@ -303,14 +303,14 @@ cdef _determine_offset(kwds): # more, nanosecond(s) are handled by apply_wraps kwds_no_nanos = dict( (k, v) for k, v in kwds.items() - if k not in ('nanosecond', 'nanoseconds') + if k not in ("nanosecond", "nanoseconds") ) # TODO: Are nanosecond and nanoseconds allowed somewhere? - _kwds_use_relativedelta = ('years', 'months', 'weeks', 'days', - 'year', 'month', 'week', 'day', 'weekday', - 'hour', 'minute', 'second', 'microsecond', - 'millisecond') + _kwds_use_relativedelta = ("years", "months", "weeks", "days", + "year", "month", "week", "day", "weekday", + "hour", "minute", "second", "microsecond", + "millisecond") use_relativedelta = False if len(kwds_no_nanos) > 0: @@ -327,7 +327,7 @@ cdef _determine_offset(kwds): else: # sub-daily offset - use timedelta (tz-aware) offset = timedelta(**kwds_no_nanos) - elif any(nano in kwds for nano in ('nanosecond', 'nanoseconds')): + elif any(nano in kwds for nano in ("nanosecond", "nanoseconds")): offset = timedelta(days=0) else: # GH 45643/45890: (historically) defaults to 1 day for non-nano @@ -424,11 +424,11 @@ cdef class BaseOffset: # cython attributes are not in __dict__ all_paras[attr] = getattr(self, attr) - if 'holidays' in all_paras and not all_paras['holidays']: - all_paras.pop('holidays') - exclude = ['kwds', 'name', 'calendar'] + if "holidays" in all_paras and not all_paras["holidays"]: + all_paras.pop("holidays") + exclude = ["kwds", "name", "calendar"] attrs = [(k, v) for k, v in all_paras.items() - if (k not in exclude) and (k[0] != '_')] + if (k not in exclude) and (k[0] != "_")] attrs = sorted(set(attrs)) params = tuple([str(type(self))] + attrs) return params @@ -481,7 +481,7 @@ cdef class BaseOffset: def __sub__(self, other): if PyDateTime_Check(other): - raise TypeError('Cannot subtract datetime from offset.') + raise TypeError("Cannot subtract datetime from offset.") elif type(other) == type(self): return type(self)(self.n - other.n, normalize=self.normalize, **self.kwds) @@ -736,13 +736,13 @@ cdef class BaseOffset: ValueError if n != int(n) """ if util.is_timedelta64_object(n): - raise TypeError(f'`n` argument must be an integer, got {type(n)}') + raise TypeError(f"`n` argument must be an integer, got {type(n)}") try: nint = int(n) except (ValueError, TypeError): - raise TypeError(f'`n` argument must be an integer, got {type(n)}') + raise TypeError(f"`n` argument must be an integer, got {type(n)}") if n != nint: - raise ValueError(f'`n` argument must be an integer, got {n}') + raise ValueError(f"`n` argument must be an integer, got {n}") return nint def __setstate__(self, state): @@ -1700,7 +1700,7 @@ cdef class BusinessHour(BusinessMixin): out = super()._repr_attrs() # Use python string formatting to be faster than strftime hours = ",".join( - f'{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}' + f"{st.hour:02d}:{st.minute:02d}-{en.hour:02d}:{en.minute:02d}" for st, en in zip(self.start, self.end) ) attrs = [f"{self._prefix}={hours}"] @@ -3675,7 +3675,7 @@ cdef class _CustomBusinessMonth(BusinessMixin): Define default roll function to be called in apply method. """ cbday_kwds = self.kwds.copy() - cbday_kwds['offset'] = timedelta(0) + cbday_kwds["offset"] = timedelta(0) cbday = CustomBusinessDay(n=1, normalize=False, **cbday_kwds) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 232169f3844b3..25a2722c48bd6 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -94,7 +94,7 @@ PARSING_WARNING_MSG = ( ) cdef: - set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} + set _not_datelike_strings = {"a", "A", "m", "M", "p", "P", "t", "T"} # ---------------------------------------------------------------------- cdef: @@ -165,38 +165,38 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): month = _parse_2digit(buf) day = _parse_2digit(buf + 3) year = _parse_4digit(buf + 6) - reso = 'day' + reso = "day" can_swap = 1 elif length == 9 and _is_delimiter(buf[1]) and _is_delimiter(buf[4]): # parsing M?DD?YYYY and D?MM?YYYY dates month = _parse_1digit(buf) day = _parse_2digit(buf + 2) year = _parse_4digit(buf + 5) - reso = 'day' + reso = "day" can_swap = 1 elif length == 9 and _is_delimiter(buf[2]) and _is_delimiter(buf[4]): # parsing MM?D?YYYY and DD?M?YYYY dates month = _parse_2digit(buf) day = _parse_1digit(buf + 3) year = _parse_4digit(buf + 5) - reso = 'day' + reso = "day" can_swap = 1 elif length == 8 and _is_delimiter(buf[1]) and _is_delimiter(buf[3]): # parsing M?D?YYYY and D?M?YYYY dates month = _parse_1digit(buf) day = _parse_1digit(buf + 2) year = _parse_4digit(buf + 4) - reso = 'day' + reso = "day" can_swap = 1 elif length == 7 and _is_delimiter(buf[2]): # parsing MM?YYYY dates - if buf[2] == b'.': + if buf[2] == b".": # we cannot reliably tell whether e.g. 10.2010 is a float # or a date, thus we refuse to parse it here return None, None month = _parse_2digit(buf) year = _parse_4digit(buf + 3) - reso = 'month' + reso = "month" else: return None, None @@ -214,16 +214,16 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): if dayfirst and not swapped_day_and_month: warnings.warn( PARSING_WARNING_MSG.format( - format='MM/DD/YYYY', - dayfirst='True', + format="MM/DD/YYYY", + dayfirst="True", ), stacklevel=find_stack_level(), ) elif not dayfirst and swapped_day_and_month: warnings.warn( PARSING_WARNING_MSG.format( - format='DD/MM/YYYY', - dayfirst='False (the default)', + format="DD/MM/YYYY", + dayfirst="False (the default)", ), stacklevel=find_stack_level(), ) @@ -255,11 +255,11 @@ cdef inline bint does_string_look_like_time(str parse_string): buf = get_c_string_buf_and_size(parse_string, &length) if length >= 4: - if buf[1] == b':': + if buf[1] == b":": # h:MM format hour = getdigit_ascii(buf[0], -1) minute = _parse_2digit(buf + 2) - elif buf[2] == b':': + elif buf[2] == b":": # HH:MM format hour = _parse_2digit(buf) minute = _parse_2digit(buf + 3) @@ -289,7 +289,7 @@ def parse_datetime_string( datetime dt if not _does_string_look_like_datetime(date_string): - raise ValueError(f'Given date string {date_string} not likely a datetime') + raise ValueError(f"Given date string {date_string} not likely a datetime") if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME @@ -323,7 +323,7 @@ def parse_datetime_string( except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable - raise ValueError(f'Given date string {date_string} not likely a datetime') + raise ValueError(f"Given date string {date_string} not likely a datetime") return dt @@ -399,7 +399,7 @@ cdef parse_datetime_string_with_reso( int out_tzoffset if not _does_string_look_like_datetime(date_string): - raise ValueError(f'Given date string {date_string} not likely a datetime') + raise ValueError(f"Given date string {date_string} not likely a datetime") parsed, reso = _parse_delimited_date(date_string, dayfirst) if parsed is not None: @@ -478,7 +478,7 @@ cpdef bint _does_string_look_like_datetime(str py_string): buf = get_c_string_buf_and_size(py_string, &length) if length >= 1: first = buf[0] - if first == b'0': + if first == b"0": # Strings starting with 0 are more consistent with a # date-like string than a number return True @@ -492,7 +492,7 @@ cpdef bint _does_string_look_like_datetime(str py_string): # a float number can be used, b'\0' - not to use a thousand # separator, 1 - skip extra spaces before and after, converted_date = xstrtod(buf, &endptr, - b'.', b'e', b'\0', 1, &error, NULL) + b".", b"e", b"\0", 1, &error, NULL) # if there were no errors and the whole line was parsed, then ... if error == 0 and endptr == buf + length: return converted_date >= 1000 @@ -512,7 +512,7 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, assert isinstance(date_string, str) if date_string in nat_strings: - return NaT, '' + return NaT, "" date_string = date_string.upper() date_len = len(date_string) @@ -521,21 +521,21 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, # parse year only like 2000 try: ret = default.replace(year=int(date_string)) - return ret, 'year' + return ret, "year" except ValueError: pass try: if 4 <= date_len <= 7: - i = date_string.index('Q', 1, 6) + i = date_string.index("Q", 1, 6) if i == 1: quarter = int(date_string[0]) if date_len == 4 or (date_len == 5 - and date_string[i + 1] == '-'): + and date_string[i + 1] == "-"): # r'(\d)Q-?(\d\d)') year = 2000 + int(date_string[-2:]) elif date_len == 6 or (date_len == 7 - and date_string[i + 1] == '-'): + and date_string[i + 1] == "-"): # r'(\d)Q-?(\d\d\d\d)') year = int(date_string[-4:]) else: @@ -543,14 +543,14 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, elif i == 2 or i == 3: # r'(\d\d)-?Q(\d)' if date_len == 4 or (date_len == 5 - and date_string[i - 1] == '-'): + and date_string[i - 1] == "-"): quarter = int(date_string[-1]) year = 2000 + int(date_string[:2]) else: raise ValueError elif i == 4 or i == 5: if date_len == 6 or (date_len == 7 - and date_string[i - 1] == '-'): + and date_string[i - 1] == "-"): # r'(\d\d\d\d)-?Q(\d)' quarter = int(date_string[-1]) year = int(date_string[:4]) @@ -558,9 +558,9 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, raise ValueError if not (1 <= quarter <= 4): - raise DateParseError(f'Incorrect quarterly string is given, ' - f'quarter must be ' - f'between 1 and 4: {date_string}') + raise DateParseError(f"Incorrect quarterly string is given, " + f"quarter must be " + f"between 1 and 4: {date_string}") try: # GH#1228 @@ -571,30 +571,30 @@ cdef inline object _parse_dateabbr_string(object date_string, datetime default, f"freq: {freq}") ret = default.replace(year=year, month=month) - return ret, 'quarter' + return ret, "quarter" except DateParseError: raise except ValueError: pass - if date_len == 6 and freq == 'M': + if date_len == 6 and freq == "M": year = int(date_string[:4]) month = int(date_string[4:6]) try: ret = default.replace(year=year, month=month) - return ret, 'month' + return ret, "month" except ValueError: pass - for pat in ['%Y-%m', '%b %Y', '%b-%Y']: + for pat in ["%Y-%m", "%b %Y", "%b-%Y"]: try: ret = datetime.strptime(date_string, pat) - return ret, 'month' + return ret, "month" except ValueError: pass - raise ValueError(f'Unable to parse {date_string}') + raise ValueError(f"Unable to parse {date_string}") cpdef quarter_to_myear(int year, int quarter, str freq): @@ -664,11 +664,11 @@ cdef dateutil_parse( if reso is None: raise ValueError(f"Unable to parse datetime string: {timestr}") - if reso == 'microsecond': - if repl['microsecond'] == 0: - reso = 'second' - elif repl['microsecond'] % 1000 == 0: - reso = 'millisecond' + if reso == "microsecond": + if repl["microsecond"] == 0: + reso = "second" + elif repl["microsecond"] % 1000 == 0: + reso = "millisecond" ret = default.replace(**repl) if res.weekday is not None and not res.day: @@ -712,7 +712,7 @@ def try_parse_dates( object[::1] result n = len(values) - result = np.empty(n, dtype='O') + result = np.empty(n, dtype="O") if parser is None: if default is None: # GH2618 @@ -725,7 +725,7 @@ def try_parse_dates( # EAFP here try: for i in range(n): - if values[i] == '': + if values[i] == "": result[i] = np.nan else: result[i] = parse_date(values[i]) @@ -736,7 +736,7 @@ def try_parse_dates( parse_date = parser for i in range(n): - if values[i] == '': + if values[i] == "": result[i] = np.nan else: result[i] = parse_date(values[i]) @@ -754,8 +754,8 @@ def try_parse_year_month_day( n = len(years) # TODO(cython3): Use len instead of `shape[0]` if months.shape[0] != n or days.shape[0] != n: - raise ValueError('Length of years/months/days must all be equal') - result = np.empty(n, dtype='O') + raise ValueError("Length of years/months/days must all be equal") + result = np.empty(n, dtype="O") for i in range(n): result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) @@ -786,8 +786,8 @@ def try_parse_datetime_components(object[:] years, or minutes.shape[0] != n or seconds.shape[0] != n ): - raise ValueError('Length of all datetime components must be equal') - result = np.empty(n, dtype='O') + raise ValueError("Length of all datetime components must be equal") + result = np.empty(n, dtype="O") for i in range(n): float_secs = float(seconds[i]) @@ -818,15 +818,15 @@ def try_parse_datetime_components(object[:] years, # Copyright (c) 2017 - dateutil contributors class _timelex: def __init__(self, instream): - if getattr(instream, 'decode', None) is not None: + if getattr(instream, "decode", None) is not None: instream = instream.decode() if isinstance(instream, str): self.stream = instream - elif getattr(instream, 'read', None) is None: + elif getattr(instream, "read", None) is None: raise TypeError( - 'Parser must be a string or character stream, not ' - f'{type(instream).__name__}') + "Parser must be a string or character stream, not " + f"{type(instream).__name__}") else: self.stream = instream.read() @@ -846,7 +846,7 @@ class _timelex: cdef: Py_ssize_t n - stream = self.stream.replace('\x00', '') + stream = self.stream.replace("\x00", "") # TODO: Change \s --> \s+ (this doesn't match existing behavior) # TODO: change the punctuation block to punc+ (does not match existing) @@ -865,10 +865,10 @@ class _timelex: # Kludge to match ,-decimal behavior; it'd be better to do this # later in the process and have a simpler tokenization if (token is not None and token.isdigit() and - tokens[n + 1] == ',' and tokens[n + 2].isdigit()): + tokens[n + 1] == "," and tokens[n + 2].isdigit()): # Have to check None b/c it might be replaced during the loop # TODO: I _really_ don't faking the value here - tokens[n] = token + '.' + tokens[n + 2] + tokens[n] = token + "." + tokens[n + 2] tokens[n + 1] = None tokens[n + 2] = None @@ -889,12 +889,12 @@ def format_is_iso(f: str) -> bint: Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different but must be consistent. Leading 0s in dates and times are optional. """ - iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}'.format - excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] + iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format + excluded_formats = ["%Y%m%d", "%Y%m", "%Y"] - for date_sep in [' ', '/', '\\', '-', '.', '']: - for time_sep in [' ', 'T']: - for micro_or_tz in ['', '%z', '.%f', '.%f%z']: + for date_sep in [" ", "/", "\\", "-", ".", ""]: + for time_sep in [" ", "T"]: + for micro_or_tz in ["", "%z", ".%f", ".%f%z"]: if (iso_template(date_sep=date_sep, time_sep=time_sep, micro_or_tz=micro_or_tz, @@ -922,25 +922,25 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime format string (for `strftime` or `strptime`), or None if it can't be guessed. """ - day_attribute_and_format = (('day',), '%d', 2) + day_attribute_and_format = (("day",), "%d", 2) # attr name, format, padding (if any) datetime_attrs_to_format = [ - (('year', 'month', 'day'), '%Y%m%d', 0), - (('year',), '%Y', 0), - (('month',), '%B', 0), - (('month',), '%b', 0), - (('month',), '%m', 2), + (("year", "month", "day"), "%Y%m%d", 0), + (("year",), "%Y", 0), + (("month",), "%B", 0), + (("month",), "%b", 0), + (("month",), "%m", 2), day_attribute_and_format, - (('hour',), '%H', 2), - (('minute',), '%M', 2), - (('second',), '%S', 2), - (('second', 'microsecond'), '%S.%f', 0), - (('tzinfo',), '%z', 0), - (('tzinfo',), '%Z', 0), - (('day_of_week',), '%a', 0), - (('day_of_week',), '%A', 0), - (('meridiem',), '%p', 0), + (("hour",), "%H", 2), + (("minute",), "%M", 2), + (("second",), "%S", 2), + (("second", "microsecond"), "%S.%f", 0), + (("tzinfo",), "%z", 0), + (("tzinfo",), "%Z", 0), + (("day_of_week",), "%a", 0), + (("day_of_week",), "%A", 0), + (("meridiem",), "%p", 0), ] if dayfirst: @@ -967,13 +967,13 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: # instead of ‘+09:00’. if parsed_datetime.tzinfo is not None: offset_index = None - if len(tokens) > 0 and tokens[-1] == 'Z': + if len(tokens) > 0 and tokens[-1] == "Z": # the last 'Z' means zero offset offset_index = -1 - elif len(tokens) > 1 and tokens[-2] in ('+', '-'): + elif len(tokens) > 1 and tokens[-2] in ("+", "-"): # ex. [..., '+', '0900'] offset_index = -2 - elif len(tokens) > 3 and tokens[-4] in ('+', '-'): + elif len(tokens) > 3 and tokens[-4] in ("+", "-"): # ex. [..., '+', '09', ':', '00'] offset_index = -4 @@ -1017,10 +1017,10 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: # We make exceptions for %Y and %Y-%m (only with the `-` separator) # as they conform with ISO8601. if ( - len({'year', 'month', 'day'} & found_attrs) != 3 - and format_guess != ['%Y'] + len({"year", "month", "day"} & found_attrs) != 3 + and format_guess != ["%Y"] and not ( - format_guess == ['%Y', None, '%m'] and tokens[1] == '-' + format_guess == ["%Y", None, "%m"] and tokens[1] == "-" ) ): return None @@ -1042,7 +1042,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: output_format.append(tokens[i]) - guessed_format = ''.join(output_format) + guessed_format = "".join(output_format) try: array_strptime(np.asarray([dt_str], dtype=object), guessed_format) @@ -1050,7 +1050,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: # Doesn't parse, so this can't be the correct format. return None # rebuild string, capturing any inferred padding - dt_str = ''.join(tokens) + dt_str = "".join(tokens) if parsed_datetime.strftime(guessed_format) == dt_str: return guessed_format else: @@ -1059,16 +1059,16 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: cdef str _fill_token(token: str, padding: int): cdef str token_filled - if '.' not in token: + if "." not in token: token_filled = token.zfill(padding) else: - seconds, nanoseconds = token.split('.') - seconds = f'{int(seconds):02d}' + seconds, nanoseconds = token.split(".") + seconds = f"{int(seconds):02d}" # right-pad so we get nanoseconds, then only take # first 6 digits (microseconds) as stdlib datetime # doesn't support nanoseconds - nanoseconds = nanoseconds.ljust(9, '0')[:6] - token_filled = f'{seconds}.{nanoseconds}' + nanoseconds = nanoseconds.ljust(9, "0")[:6] + token_filled = f"{seconds}.{nanoseconds}" return token_filled diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 0e7cfa4dd9670..cc9c2d631bcd9 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1163,29 +1163,29 @@ cdef str period_format(int64_t value, int freq, object fmt=None): if fmt is None: freq_group = get_freq_group(freq) if freq_group == FR_ANN: - fmt = b'%Y' + fmt = b"%Y" elif freq_group == FR_QTR: - fmt = b'%FQ%q' + fmt = b"%FQ%q" elif freq_group == FR_MTH: - fmt = b'%Y-%m' + fmt = b"%Y-%m" elif freq_group == FR_WK: left = period_asfreq(value, freq, FR_DAY, 0) right = period_asfreq(value, freq, FR_DAY, 1) return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}" elif freq_group == FR_BUS or freq_group == FR_DAY: - fmt = b'%Y-%m-%d' + fmt = b"%Y-%m-%d" elif freq_group == FR_HR: - fmt = b'%Y-%m-%d %H:00' + fmt = b"%Y-%m-%d %H:00" elif freq_group == FR_MIN: - fmt = b'%Y-%m-%d %H:%M' + fmt = b"%Y-%m-%d %H:%M" elif freq_group == FR_SEC: - fmt = b'%Y-%m-%d %H:%M:%S' + fmt = b"%Y-%m-%d %H:%M:%S" elif freq_group == FR_MS: - fmt = b'%Y-%m-%d %H:%M:%S.%l' + fmt = b"%Y-%m-%d %H:%M:%S.%l" elif freq_group == FR_US: - fmt = b'%Y-%m-%d %H:%M:%S.%u' + fmt = b"%Y-%m-%d %H:%M:%S.%u" elif freq_group == FR_NS: - fmt = b'%Y-%m-%d %H:%M:%S.%n' + fmt = b"%Y-%m-%d %H:%M:%S.%n" else: raise ValueError(f"Unknown freq: {freq}") @@ -1513,7 +1513,7 @@ def extract_freq(ndarray[object] values) -> BaseOffset: if is_period_object(value): return value.freq - raise ValueError('freq not specified and cannot be inferred') + raise ValueError("freq not specified and cannot be inferred") # ----------------------------------------------------------------------- # period helpers @@ -1774,7 +1774,7 @@ cdef class _Period(PeriodMixin): return NaT return NotImplemented - def asfreq(self, freq, how='E') -> "Period": + def asfreq(self, freq, how="E") -> "Period": """ Convert Period to desired frequency, at the start or end of the interval. @@ -1795,7 +1795,7 @@ cdef class _Period(PeriodMixin): base2 = freq_to_dtype_code(freq) # self.n can't be negative or 0 - end = how == 'E' + end = how == "E" if end: ordinal = self.ordinal + self.freq.n - 1 else: @@ -1826,13 +1826,13 @@ cdef class _Period(PeriodMixin): """ how = validate_end_alias(how) - end = how == 'E' + end = how == "E" if end: if freq == "B" or self.freq == "B": # roll forward to ensure we land on B date adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns") return self.to_timestamp(how="start") + adjust - endpoint = (self + self.freq).to_timestamp(how='start') + endpoint = (self + self.freq).to_timestamp(how="start") return endpoint - np.timedelta64(1, "ns") if freq is None: @@ -2530,7 +2530,7 @@ class Period(_Period): if not util.is_integer_object(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: - raise ValueError('Must supply freq for ordinal value') + raise ValueError("Must supply freq for ordinal value") elif value is None: if (year is None and month is None and @@ -2581,7 +2581,7 @@ class Period(_Period): else: nanosecond = ts.nanosecond if nanosecond != 0: - reso = 'nanosecond' + reso = "nanosecond" if dt is NaT: ordinal = NPY_NAT @@ -2596,18 +2596,18 @@ class Period(_Period): elif PyDateTime_Check(value): dt = value if freq is None: - raise ValueError('Must supply freq for datetime value') + raise ValueError("Must supply freq for datetime value") if isinstance(dt, Timestamp): nanosecond = dt.nanosecond elif util.is_datetime64_object(value): dt = Timestamp(value) if freq is None: - raise ValueError('Must supply freq for datetime value') + raise ValueError("Must supply freq for datetime value") nanosecond = dt.nanosecond elif PyDate_Check(value): dt = datetime(year=value.year, month=value.month, day=value.day) if freq is None: - raise ValueError('Must supply freq for datetime value') + raise ValueError("Must supply freq for datetime value") else: msg = "Value must be Period, string, integer, or datetime" raise ValueError(msg) @@ -2644,10 +2644,10 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, def validate_end_alias(how: str) -> str: # Literal["E", "S"] - how_dict = {'S': 'S', 'E': 'E', - 'START': 'S', 'FINISH': 'E', - 'BEGIN': 'S', 'END': 'E'} + how_dict = {"S": "S", "E": "E", + "START": "S", "FINISH": "E", + "BEGIN": "S", "END": "E"} how = how_dict.get(str(how).upper()) - if how not in {'S', 'E'}: - raise ValueError('How must be one of S or E') + if how not in {"S", "E"}: + raise ValueError("How must be one of S or E") return how diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 79944bc86a8cf..c56b4891da428 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -35,36 +35,36 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.timestamps cimport _Timestamp -cdef dict _parse_code_table = {'y': 0, - 'Y': 1, - 'm': 2, - 'B': 3, - 'b': 4, - 'd': 5, - 'H': 6, - 'I': 7, - 'M': 8, - 'S': 9, - 'f': 10, - 'A': 11, - 'a': 12, - 'w': 13, - 'j': 14, - 'U': 15, - 'W': 16, - 'Z': 17, - 'p': 18, # an additional key, only with I - 'z': 19, - 'G': 20, - 'V': 21, - 'u': 22} +cdef dict _parse_code_table = {"y": 0, + "Y": 1, + "m": 2, + "B": 3, + "b": 4, + "d": 5, + "H": 6, + "I": 7, + "M": 8, + "S": 9, + "f": 10, + "A": 11, + "a": 12, + "w": 13, + "j": 14, + "U": 15, + "W": 16, + "Z": 17, + "p": 18, # an additional key, only with I + "z": 19, + "G": 20, + "V": 21, + "u": 22} def array_strptime( ndarray[object] values, str fmt, bint exact=True, - errors='raise', + errors="raise", bint utc=False, ): """ @@ -88,9 +88,9 @@ def array_strptime( int iso_week, iso_year int64_t us, ns object val, group_key, ampm, found, timezone - bint is_raise = errors=='raise' - bint is_ignore = errors=='ignore' - bint is_coerce = errors=='coerce' + bint is_raise = errors=="raise" + bint is_ignore = errors=="ignore" + bint is_coerce = errors=="coerce" bint found_naive = False bint found_tz = False tzinfo tz_out = None @@ -98,12 +98,12 @@ def array_strptime( assert is_raise or is_ignore or is_coerce if fmt is not None: - if '%W' in fmt or '%U' in fmt: - if '%Y' not in fmt and '%y' not in fmt: + if "%W" in fmt or "%U" in fmt: + if "%Y" not in fmt and "%y" not in fmt: raise ValueError("Cannot use '%W' or '%U' without day and year") - if '%A' not in fmt and '%a' not in fmt and '%w' not in fmt: + if "%A" not in fmt and "%a" not in fmt and "%w" not in fmt: raise ValueError("Cannot use '%W' or '%U' without day and year") - elif '%Z' in fmt and '%z' in fmt: + elif "%Z" in fmt and "%z" in fmt: raise ValueError("Cannot parse both %Z and %z") global _TimeRE_cache, _regex_cache @@ -132,9 +132,9 @@ def array_strptime( raise ValueError(f"stray % in format '{fmt}'") _regex_cache[fmt] = format_regex - result = np.empty(n, dtype='M8[ns]') - iresult = result.view('i8') - result_timezone = np.empty(n, dtype='object') + result = np.empty(n, dtype="M8[ns]") + iresult = result.view("i8") + result_timezone = np.empty(n, dtype="object") dts.us = dts.ps = dts.as = 0 @@ -216,7 +216,7 @@ def array_strptime( parse_code = _parse_code_table[group_key] if parse_code == 0: - year = int(found_dict['y']) + year = int(found_dict["y"]) # Open Group specification for strptime() states that a %y # value in the range of [00, 68] is in the century 2000, while # [69,99] is in the century 1900 @@ -225,26 +225,26 @@ def array_strptime( else: year += 1900 elif parse_code == 1: - year = int(found_dict['Y']) + year = int(found_dict["Y"]) elif parse_code == 2: - month = int(found_dict['m']) + month = int(found_dict["m"]) # elif group_key == 'B': elif parse_code == 3: - month = locale_time.f_month.index(found_dict['B'].lower()) + month = locale_time.f_month.index(found_dict["B"].lower()) # elif group_key == 'b': elif parse_code == 4: - month = locale_time.a_month.index(found_dict['b'].lower()) + month = locale_time.a_month.index(found_dict["b"].lower()) # elif group_key == 'd': elif parse_code == 5: - day = int(found_dict['d']) + day = int(found_dict["d"]) # elif group_key == 'H': elif parse_code == 6: - hour = int(found_dict['H']) + hour = int(found_dict["H"]) elif parse_code == 7: - hour = int(found_dict['I']) - ampm = found_dict.get('p', '').lower() + hour = int(found_dict["I"]) + ampm = found_dict.get("p", "").lower() # If there was no AM/PM indicator, we'll treat this like AM - if ampm in ('', locale_time.am_pm[0]): + if ampm in ("", locale_time.am_pm[0]): # We're in AM so the hour is correct unless we're # looking at 12 midnight. # 12 midnight == 12 AM == hour 0 @@ -257,46 +257,46 @@ def array_strptime( if hour != 12: hour += 12 elif parse_code == 8: - minute = int(found_dict['M']) + minute = int(found_dict["M"]) elif parse_code == 9: - second = int(found_dict['S']) + second = int(found_dict["S"]) elif parse_code == 10: - s = found_dict['f'] + s = found_dict["f"] # Pad to always return nanoseconds s += "0" * (9 - len(s)) us = long(s) ns = us % 1000 us = us // 1000 elif parse_code == 11: - weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + weekday = locale_time.f_weekday.index(found_dict["A"].lower()) elif parse_code == 12: - weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + weekday = locale_time.a_weekday.index(found_dict["a"].lower()) elif parse_code == 13: - weekday = int(found_dict['w']) + weekday = int(found_dict["w"]) if weekday == 0: weekday = 6 else: weekday -= 1 elif parse_code == 14: - julian = int(found_dict['j']) + julian = int(found_dict["j"]) elif parse_code == 15 or parse_code == 16: week_of_year = int(found_dict[group_key]) - if group_key == 'U': + if group_key == "U": # U starts week on Sunday. week_of_year_start = 6 else: # W starts week on Monday. week_of_year_start = 0 elif parse_code == 17: - timezone = pytz.timezone(found_dict['Z']) + timezone = pytz.timezone(found_dict["Z"]) elif parse_code == 19: - timezone = parse_timezone_directive(found_dict['z']) + timezone = parse_timezone_directive(found_dict["z"]) elif parse_code == 20: - iso_year = int(found_dict['G']) + iso_year = int(found_dict["G"]) elif parse_code == 21: - iso_week = int(found_dict['V']) + iso_week = int(found_dict["V"]) elif parse_code == 22: - weekday = int(found_dict['u']) + weekday = int(found_dict["u"]) weekday -= 1 # don't assume default values for ISO week/year @@ -424,7 +424,7 @@ class TimeRE(_TimeRE): if key == "Z": # lazy computation if self._Z is None: - self._Z = self.__seqToRE(pytz.all_timezones, 'Z') + self._Z = self.__seqToRE(pytz.all_timezones, "Z") # Note: handling Z is the key difference vs using the stdlib # _strptime.TimeRE. test_to_datetime_parse_tzname_or_tzoffset with # fmt='%Y-%m-%d %H:%M:%S %Z' fails with the stdlib version. @@ -543,12 +543,12 @@ cdef tzinfo parse_timezone_directive(str z): int total_minutes object gmtoff_remainder, gmtoff_remainder_padding - if z == 'Z': + if z == "Z": return pytz.FixedOffset(0) - if z[3] == ':': + if z[3] == ":": z = z[:3] + z[4:] if len(z) > 5: - if z[5] != ':': + if z[5] != ":": raise ValueError(f"Inconsistent use of : in {z}") z = z[:5] + z[6:] hours = int(z[1:3]) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 5cc97a722b7a6..fc276d5d024cd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -410,7 +410,7 @@ def array_to_timedelta64( # raise here otherwise we segfault below raise TypeError("array_to_timedelta64 'values' must have object dtype") - if errors not in {'ignore', 'raise', 'coerce'}: + if errors not in {"ignore", "raise", "coerce"}: raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}") if unit is not None and errors != "coerce": @@ -442,7 +442,7 @@ def array_to_timedelta64( except (TypeError, ValueError): cnp.PyArray_MultiIter_RESET(mi) - parsed_unit = parse_timedelta_unit(unit or 'ns') + parsed_unit = parse_timedelta_unit(unit or "ns") for i in range(n): item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -513,15 +513,15 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: for c in ts: # skip whitespace / commas - if c == ' ' or c == ',': + if c == " " or c == ",": pass # positive signs are ignored - elif c == '+': + elif c == "+": pass # neg - elif c == '-': + elif c == "-": if neg or have_value or have_hhmmss: raise ValueError("only leading negative signs are allowed") @@ -550,7 +550,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: result += timedelta_as_neg(r, neg) # hh:mm:ss. - elif c == ':': + elif c == ":": # we flip this off if we have a leading value if have_value: @@ -559,15 +559,15 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: # we are in the pattern hh:mm:ss pattern if len(number): if current_unit is None: - current_unit = 'h' + current_unit = "h" m = 1000000000 * 3600 - elif current_unit == 'h': - current_unit = 'm' + elif current_unit == "h": + current_unit = "m" m = 1000000000 * 60 - elif current_unit == 'm': - current_unit = 's' + elif current_unit == "m": + current_unit = "s" m = 1000000000 - r = int(''.join(number)) * m + r = int("".join(number)) * m result += timedelta_as_neg(r, neg) have_hhmmss = 1 else: @@ -576,17 +576,17 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: unit, number = [], [] # after the decimal point - elif c == '.': + elif c == ".": if len(number) and current_unit is not None: # by definition we had something like # so we need to evaluate the final field from a # hh:mm:ss (so current_unit is 'm') - if current_unit != 'm': + if current_unit != "m": raise ValueError("expected hh:mm:ss format before .") m = 1000000000 - r = int(''.join(number)) * m + r = int("".join(number)) * m result += timedelta_as_neg(r, neg) have_value = 1 unit, number, frac = [], [], [] @@ -622,16 +622,16 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: else: m = 1 frac = frac[:9] - r = int(''.join(frac)) * m + r = int("".join(frac)) * m result += timedelta_as_neg(r, neg) # we have a regular format # we must have seconds at this point (hence the unit is still 'm') elif current_unit is not None: - if current_unit != 'm': + if current_unit != "m": raise ValueError("expected hh:mm:ss format") m = 1000000000 - r = int(''.join(number)) * m + r = int("".join(number)) * m result += timedelta_as_neg(r, neg) # we have a last abbreviation @@ -652,7 +652,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: if have_value: raise ValueError("have leftover units") if len(number): - r = timedelta_from_spec(number, frac, 'ns') + r = timedelta_from_spec(number, frac, "ns") result += timedelta_as_neg(r, neg) return result @@ -683,20 +683,20 @@ cdef inline timedelta_from_spec(object number, object frac, object unit): cdef: str n - unit = ''.join(unit) + unit = "".join(unit) if unit in ["M", "Y", "y"]: raise ValueError( "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta " "values and are not supported." ) - if unit == 'M': + if unit == "M": # To parse ISO 8601 string, 'M' should be treated as minute, # not month - unit = 'm' + unit = "m" unit = parse_timedelta_unit(unit) - n = ''.join(number) + '.' + ''.join(frac) + n = "".join(number) + "." + "".join(frac) return cast_from_unit(float(n), unit) @@ -770,9 +770,9 @@ def _binary_op_method_timedeltalike(op, name): item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) return f(self, item) - elif other.dtype.kind in ['m', 'M']: + elif other.dtype.kind in ["m", "M"]: return op(self.to_timedelta64(), other) - elif other.dtype.kind == 'O': + elif other.dtype.kind == "O": return np.array([op(self, x) for x in other]) else: return NotImplemented @@ -838,7 +838,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: unicode c int64_t result = 0, r int p = 0, sign = 1 - object dec_unit = 'ms', err_msg + object dec_unit = "ms", err_msg bint have_dot = 0, have_value = 0, neg = 0 list number = [], unit = [] @@ -854,65 +854,65 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: have_value = 1 if have_dot: - if p == 3 and dec_unit != 'ns': + if p == 3 and dec_unit != "ns": unit.append(dec_unit) - if dec_unit == 'ms': - dec_unit = 'us' - elif dec_unit == 'us': - dec_unit = 'ns' + if dec_unit == "ms": + dec_unit = "us" + elif dec_unit == "us": + dec_unit = "ns" p = 0 p += 1 if not len(unit): number.append(c) else: - r = timedelta_from_spec(number, '0', unit) + r = timedelta_from_spec(number, "0", unit) result += timedelta_as_neg(r, neg) neg = 0 unit, number = [], [c] else: - if c == 'P' or c == 'T': + if c == "P" or c == "T": pass # ignore marking characters P and T - elif c == '-': + elif c == "-": if neg or have_value: raise ValueError(err_msg) else: neg = 1 elif c == "+": pass - elif c in ['W', 'D', 'H', 'M']: - if c in ['H', 'M'] and len(number) > 2: + elif c in ["W", "D", "H", "M"]: + if c in ["H", "M"] and len(number) > 2: raise ValueError(err_msg) - if c == 'M': - c = 'min' + if c == "M": + c = "min" unit.append(c) - r = timedelta_from_spec(number, '0', unit) + r = timedelta_from_spec(number, "0", unit) result += timedelta_as_neg(r, neg) neg = 0 unit, number = [], [] - elif c == '.': + elif c == ".": # append any seconds if len(number): - r = timedelta_from_spec(number, '0', 'S') + r = timedelta_from_spec(number, "0", "S") result += timedelta_as_neg(r, neg) unit, number = [], [] have_dot = 1 - elif c == 'S': + elif c == "S": if have_dot: # ms, us, or ns if not len(number) or p > 3: raise ValueError(err_msg) # pad to 3 digits as required pad = 3 - p while pad > 0: - number.append('0') + number.append("0") pad -= 1 - r = timedelta_from_spec(number, '0', dec_unit) + r = timedelta_from_spec(number, "0", dec_unit) result += timedelta_as_neg(r, neg) else: # seconds - r = timedelta_from_spec(number, '0', 'S') + r = timedelta_from_spec(number, "0", "S") result += timedelta_as_neg(r, neg) else: raise ValueError(err_msg) @@ -1435,7 +1435,7 @@ cdef class _Timedelta(timedelta): else: sign = " " - if format == 'all': + if format == "all": fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." "{milliseconds:03}{microseconds:03}{nanoseconds:03}") else: @@ -1451,24 +1451,24 @@ cdef class _Timedelta(timedelta): else: seconds_fmt = "{seconds:02}" - if format == 'sub_day' and not self._d: + if format == "sub_day" and not self._d: fmt = "{hours:02}:{minutes:02}:" + seconds_fmt - elif subs or format == 'long': + elif subs or format == "long": fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt else: fmt = "{days} days" comp_dict = self.components._asdict() - comp_dict['sign'] = sign + comp_dict["sign"] = sign return fmt.format(**comp_dict) def __repr__(self) -> str: - repr_based = self._repr_base(format='long') + repr_based = self._repr_base(format="long") return f"Timedelta('{repr_based}')" def __str__(self) -> str: - return self._repr_base(format='long') + return self._repr_base(format="long") def __bool__(self) -> bool: return self.value != 0 @@ -1512,14 +1512,14 @@ cdef class _Timedelta(timedelta): 'P500DT12H0M0S' """ components = self.components - seconds = (f'{components.seconds}.' - f'{components.milliseconds:0>3}' - f'{components.microseconds:0>3}' - f'{components.nanoseconds:0>3}') + seconds = (f"{components.seconds}." + f"{components.milliseconds:0>3}" + f"{components.microseconds:0>3}" + f"{components.nanoseconds:0>3}") # Trim unnecessary 0s, 1.000000000 -> 1 - seconds = seconds.rstrip('0').rstrip('.') - tpl = (f'P{components.days}DT{components.hours}' - f'H{components.minutes}M{seconds}S') + seconds = seconds.rstrip("0").rstrip(".") + tpl = (f"P{components.days}DT{components.hours}" + f"H{components.minutes}M{seconds}S") return tpl # ---------------------------------------------------------------- @@ -1665,22 +1665,22 @@ class Timedelta(_Timedelta): # are taken into consideration. seconds = int(( ( - (kwargs.get('days', 0) + kwargs.get('weeks', 0) * 7) * 24 - + kwargs.get('hours', 0) + (kwargs.get("days", 0) + kwargs.get("weeks", 0) * 7) * 24 + + kwargs.get("hours", 0) ) * 3600 - + kwargs.get('minutes', 0) * 60 - + kwargs.get('seconds', 0) + + kwargs.get("minutes", 0) * 60 + + kwargs.get("seconds", 0) ) * 1_000_000_000 ) value = np.timedelta64( - int(kwargs.get('nanoseconds', 0)) - + int(kwargs.get('microseconds', 0) * 1_000) - + int(kwargs.get('milliseconds', 0) * 1_000_000) + int(kwargs.get("nanoseconds", 0)) + + int(kwargs.get("microseconds", 0) * 1_000) + + int(kwargs.get("milliseconds", 0) * 1_000_000) + seconds ) - if unit in {'Y', 'y', 'M'}: + if unit in {"Y", "y", "M"}: raise ValueError( "Units 'M', 'Y', and 'y' are no longer supported, as they do not " "represent unambiguous timedelta values durations." @@ -1702,8 +1702,8 @@ class Timedelta(_Timedelta): elif isinstance(value, str): if unit is not None: raise ValueError("unit must not be specified if the value is a str") - if (len(value) > 0 and value[0] == 'P') or ( - len(value) > 1 and value[:2] == '-P' + if (len(value) > 0 and value[0] == "P") or ( + len(value) > 1 and value[:2] == "-P" ): value = parse_iso_format_string(value) else: @@ -1757,7 +1757,7 @@ class Timedelta(_Timedelta): ) if is_timedelta64_object(value): - value = value.view('i8') + value = value.view("i8") # nat if value == NPY_NAT: @@ -1839,14 +1839,14 @@ class Timedelta(_Timedelta): # Arithmetic Methods # TODO: Can some of these be defined in the cython class? - __neg__ = _op_unary_method(lambda x: -x, '__neg__') - __pos__ = _op_unary_method(lambda x: x, '__pos__') - __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + __neg__ = _op_unary_method(lambda x: -x, "__neg__") + __pos__ = _op_unary_method(lambda x: x, "__pos__") + __abs__ = _op_unary_method(lambda x: abs(x), "__abs__") - __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') - __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') - __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') - __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, "__add__") + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, "__radd__") + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, "__sub__") + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__") def __mul__(self, other): if is_integer_object(other) or is_float_object(other): @@ -1947,7 +1947,7 @@ class Timedelta(_Timedelta): item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) return self.__floordiv__(item) - if other.dtype.kind == 'm': + if other.dtype.kind == "m": # also timedelta-like # TODO: could suppress # RuntimeWarning: invalid value encountered in floor_divide @@ -1959,13 +1959,13 @@ class Timedelta(_Timedelta): result[mask] = np.nan return result - elif other.dtype.kind in ['i', 'u', 'f']: + elif other.dtype.kind in ["i", "u", "f"]: if other.ndim == 0: return self // other.item() else: return self.to_timedelta64() // other - raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + raise TypeError(f"Invalid dtype {other.dtype} for __floordiv__") return NotImplemented @@ -1987,7 +1987,7 @@ class Timedelta(_Timedelta): item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other) return self.__rfloordiv__(item) - if other.dtype.kind == 'm': + if other.dtype.kind == "m": # also timedelta-like # TODO: could suppress # RuntimeWarning: invalid value encountered in floor_divide @@ -2000,7 +2000,7 @@ class Timedelta(_Timedelta): return result # Includes integer array // Timedelta, disallowed in GH#19761 - raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + raise TypeError(f"Invalid dtype {other.dtype} for __floordiv__") return NotImplemented diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8e9c8d40398d9..f987a2feb2717 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -434,7 +434,7 @@ cdef class _Timestamp(ABCTimestamp): raise integer_op_not_supported(self) elif is_array(other): - if other.dtype.kind in ['i', 'u']: + if other.dtype.kind in ["i", "u"]: raise integer_op_not_supported(self) if other.dtype.kind == "m": if self.tz is None: @@ -465,7 +465,7 @@ cdef class _Timestamp(ABCTimestamp): return self + neg_other elif is_array(other): - if other.dtype.kind in ['i', 'u']: + if other.dtype.kind in ["i", "u"]: raise integer_op_not_supported(self) if other.dtype.kind == "m": if self.tz is None: @@ -563,7 +563,7 @@ cdef class _Timestamp(ABCTimestamp): if freq: kwds = freq.kwds - month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) freqstr = freq.freqstr else: month_kw = 12 @@ -929,15 +929,15 @@ cdef class _Timestamp(ABCTimestamp): zone = None try: - stamp += self.strftime('%z') + stamp += self.strftime("%z") except ValueError: year2000 = self.replace(year=2000) - stamp += year2000.strftime('%z') + stamp += year2000.strftime("%z") if self.tzinfo: zone = get_timezone(self.tzinfo) try: - stamp += zone.strftime(' %%Z') + stamp += zone.strftime(" %%Z") except AttributeError: # e.g. tzlocal has no `strftime` pass @@ -954,16 +954,16 @@ cdef class _Timestamp(ABCTimestamp): def _date_repr(self) -> str: # Ideal here would be self.strftime("%Y-%m-%d"), but # the datetime strftime() methods require year >= 1900 and is slower - return f'{self.year}-{self.month:02d}-{self.day:02d}' + return f"{self.year}-{self.month:02d}-{self.day:02d}" @property def _time_repr(self) -> str: - result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + result = f"{self.hour:02d}:{self.minute:02d}:{self.second:02d}" if self.nanosecond != 0: - result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + result += f".{self.nanosecond + 1000 * self.microsecond:09d}" elif self.microsecond != 0: - result += f'.{self.microsecond:06d}' + result += f".{self.microsecond:06d}" return result @@ -1451,7 +1451,7 @@ class Timestamp(_Timestamp): # GH#17690 tzinfo must be a datetime.tzinfo object, ensured # by the cython annotation. if tz is not None: - raise ValueError('Can provide at most one of tz, tzinfo') + raise ValueError("Can provide at most one of tz, tzinfo") # User passed tzinfo instead of tz; avoid silently ignoring tz, tzinfo = tzinfo, None @@ -1465,7 +1465,7 @@ class Timestamp(_Timestamp): if (ts_input is not _no_input and not ( PyDateTime_Check(ts_input) and - getattr(ts_input, 'tzinfo', None) is None)): + getattr(ts_input, "tzinfo", None) is None)): raise ValueError( "Cannot pass fold with possibly unambiguous input: int, " "float, numpy.datetime64, str, or timezone-aware " @@ -1479,7 +1479,7 @@ class Timestamp(_Timestamp): "timezones." ) - if hasattr(ts_input, 'fold'): + if hasattr(ts_input, "fold"): ts_input = ts_input.replace(fold=fold) # GH 30543 if pd.Timestamp already passed, return it @@ -1536,7 +1536,7 @@ class Timestamp(_Timestamp): # passed positionally see test_constructor_nanosecond nanosecond = microsecond - if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: + if getattr(ts_input, "tzinfo", None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " "the tz parameter. Use tz_convert instead.") @@ -1558,7 +1558,7 @@ class Timestamp(_Timestamp): return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, ts.fold, ts.creso) - def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): + def _round(self, freq, mode, ambiguous="raise", nonexistent="raise"): cdef: int64_t nanos @@ -1581,7 +1581,7 @@ class Timestamp(_Timestamp): ) return result - def round(self, freq, ambiguous='raise', nonexistent='raise'): + def round(self, freq, ambiguous="raise", nonexistent="raise"): """ Round the Timestamp to the specified resolution. @@ -1676,7 +1676,7 @@ timedelta}, default 'raise' freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent ) - def floor(self, freq, ambiguous='raise', nonexistent='raise'): + def floor(self, freq, ambiguous="raise", nonexistent="raise"): """ Return a new Timestamp floored to this resolution. @@ -1765,7 +1765,7 @@ timedelta}, default 'raise' """ return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) - def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + def ceil(self, freq, ambiguous="raise", nonexistent="raise"): """ Return a new Timestamp ceiled to this resolution. @@ -1875,7 +1875,7 @@ timedelta}, default 'raise' "Use tz_localize() or tz_convert() as appropriate" ) - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): """ Localize the Timestamp to a timezone. @@ -1946,10 +1946,10 @@ default 'raise' >>> pd.NaT.tz_localize() NaT """ - if ambiguous == 'infer': - raise ValueError('Cannot infer offset with only one time.') + if ambiguous == "infer": + raise ValueError("Cannot infer offset with only one time.") - nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not PyDelta_Check(nonexistent): raise ValueError( "The nonexistent argument must be one of 'raise', " @@ -2122,21 +2122,21 @@ default 'raise' return v if year is not None: - dts.year = validate('year', year) + dts.year = validate("year", year) if month is not None: - dts.month = validate('month', month) + dts.month = validate("month", month) if day is not None: - dts.day = validate('day', day) + dts.day = validate("day", day) if hour is not None: - dts.hour = validate('hour', hour) + dts.hour = validate("hour", hour) if minute is not None: - dts.min = validate('minute', minute) + dts.min = validate("minute", minute) if second is not None: - dts.sec = validate('second', second) + dts.sec = validate("second", second) if microsecond is not None: - dts.us = validate('microsecond', microsecond) + dts.us = validate("microsecond", microsecond) if nanosecond is not None: - dts.ps = validate('nanosecond', nanosecond) * 1000 + dts.ps = validate("nanosecond", nanosecond) * 1000 if tzinfo is not object: tzobj = tzinfo @@ -2150,10 +2150,10 @@ default 'raise' is_dst=not bool(fold)) tzobj = ts_input.tzinfo else: - kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, - 'hour': dts.hour, 'minute': dts.min, 'second': dts.sec, - 'microsecond': dts.us, 'tzinfo': tzobj, - 'fold': fold} + kwargs = {"year": dts.year, "month": dts.month, "day": dts.day, + "hour": dts.hour, "minute": dts.min, "second": dts.sec, + "microsecond": dts.us, "tzinfo": tzobj, + "fold": fold} ts_input = datetime(**kwargs) ts = convert_datetime_to_tsobject( diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index abf8bbc5ca5b9..8d7bebe5d46c2 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -97,12 +97,12 @@ cdef inline bint is_tzlocal(tzinfo tz): cdef inline bint treat_tz_as_pytz(tzinfo tz): - return (hasattr(tz, '_utc_transition_times') and - hasattr(tz, '_transition_info')) + return (hasattr(tz, "_utc_transition_times") and + hasattr(tz, "_transition_info")) cdef inline bint treat_tz_as_dateutil(tzinfo tz): - return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') + return hasattr(tz, "_trans_list") and hasattr(tz, "_trans_idx") # Returns str or tzinfo object @@ -125,16 +125,16 @@ cpdef inline object get_timezone(tzinfo tz): return tz else: if treat_tz_as_dateutil(tz): - if '.tar.gz' in tz._filename: + if ".tar.gz" in tz._filename: raise ValueError( - 'Bad tz filename. Dateutil on python 3 on windows has a ' - 'bug which causes tzfile._filename to be the same for all ' - 'timezone files. Please construct dateutil timezones ' + "Bad tz filename. Dateutil on python 3 on windows has a " + "bug which causes tzfile._filename to be the same for all " + "timezone files. Please construct dateutil timezones " 'implicitly by passing a string like "dateutil/Europe' '/London" when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil/' + tz._filename + "of passing a timezone object. See " + "https://github.com/pandas-dev/pandas/pull/7362") + return "dateutil/" + tz._filename else: # tz is a pytz timezone or unknown. try: @@ -152,19 +152,19 @@ cpdef inline tzinfo maybe_get_tz(object tz): it to construct a timezone object. Otherwise, just return tz. """ if isinstance(tz, str): - if tz == 'tzlocal()': + if tz == "tzlocal()": tz = _dateutil_tzlocal() - elif tz.startswith('dateutil/'): + elif tz.startswith("dateutil/"): zone = tz[9:] tz = dateutil_gettz(zone) # On Python 3 on Windows, the filename is not always set correctly. - if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + if isinstance(tz, _dateutil_tzfile) and ".tar.gz" in tz._filename: tz._filename = zone - elif tz[0] in {'-', '+'}: + elif tz[0] in {"-", "+"}: hours = int(tz[0:3]) minutes = int(tz[0] + tz[4:6]) tz = timezone(timedelta(hours=hours, minutes=minutes)) - elif tz[0:4] in {'UTC-', 'UTC+'}: + elif tz[0:4] in {"UTC-", "UTC+"}: hours = int(tz[3:6]) minutes = int(tz[3] + tz[7:9]) tz = timezone(timedelta(hours=hours, minutes=minutes)) @@ -211,16 +211,16 @@ cdef inline object tz_cache_key(tzinfo tz): if isinstance(tz, _pytz_BaseTzInfo): return tz.zone elif isinstance(tz, _dateutil_tzfile): - if '.tar.gz' in tz._filename: - raise ValueError('Bad tz filename. Dateutil on python 3 on ' - 'windows has a bug which causes tzfile._filename ' - 'to be the same for all timezone files. Please ' - 'construct dateutil timezones implicitly by ' + if ".tar.gz" in tz._filename: + raise ValueError("Bad tz filename. Dateutil on python 3 on " + "windows has a bug which causes tzfile._filename " + "to be the same for all timezone files. Please " + "construct dateutil timezones implicitly by " 'passing a string like "dateutil/Europe/London" ' - 'when you construct your pandas objects instead ' - 'of passing a timezone object. See ' - 'https://github.com/pandas-dev/pandas/pull/7362') - return 'dateutil' + tz._filename + "when you construct your pandas objects instead " + "of passing a timezone object. See " + "https://github.com/pandas-dev/pandas/pull/7362") + return "dateutil" + tz._filename else: return None @@ -276,7 +276,7 @@ cdef int64_t[::1] unbox_utcoffsets(object transinfo): int64_t[::1] arr sz = len(transinfo) - arr = np.empty(sz, dtype='i8') + arr = np.empty(sz, dtype="i8") for i in range(sz): arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000 @@ -312,35 +312,35 @@ cdef object get_dst_info(tzinfo tz): if cache_key not in dst_cache: if treat_tz_as_pytz(tz): - trans = np.array(tz._utc_transition_times, dtype='M8[ns]') - trans = trans.view('i8') + trans = np.array(tz._utc_transition_times, dtype="M8[ns]") + trans = trans.view("i8") if tz._utc_transition_times[0].year == 1: trans[0] = NPY_NAT + 1 deltas = unbox_utcoffsets(tz._transition_info) - typ = 'pytz' + typ = "pytz" elif treat_tz_as_dateutil(tz): if len(tz._trans_list): # get utc trans times trans_list = _get_utc_trans_times_from_dateutil_tz(tz) trans = np.hstack([ - np.array([0], dtype='M8[s]'), # place holder for 1st item - np.array(trans_list, dtype='M8[s]')]).astype( - 'M8[ns]') # all trans listed - trans = trans.view('i8') + np.array([0], dtype="M8[s]"), # place holder for 1st item + np.array(trans_list, dtype="M8[s]")]).astype( + "M8[ns]") # all trans listed + trans = trans.view("i8") trans[0] = NPY_NAT + 1 # deltas deltas = np.array([v.offset for v in ( - tz._ttinfo_before,) + tz._trans_idx], dtype='i8') + tz._ttinfo_before,) + tz._trans_idx], dtype="i8") deltas *= 1_000_000_000 - typ = 'dateutil' + typ = "dateutil" elif is_fixed_offset(tz): trans = np.array([NPY_NAT + 1], dtype=np.int64) deltas = np.array([tz._ttinfo_std.offset], - dtype='i8') * 1_000_000_000 - typ = 'fixed' + dtype="i8") * 1_000_000_000 + typ = "fixed" else: # 2018-07-12 this is not reached in the tests, and this case # is not handled in any of the functions that call @@ -367,8 +367,8 @@ def infer_tzinfo(datetime start, datetime end): if start is not None and end is not None: tz = start.tzinfo if not tz_compare(tz, end.tzinfo): - raise AssertionError(f'Inputs must both have the same timezone, ' - f'{tz} != {end.tzinfo}') + raise AssertionError(f"Inputs must both have the same timezone, " + f"{tz} != {end.tzinfo}") elif start is not None: tz = start.tzinfo elif end is not None: diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index afdf6d3d9b001..f74c72dc4e35c 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -248,9 +248,9 @@ timedelta-like} # silence false-positive compiler warning ambiguous_array = np.empty(0, dtype=bool) if isinstance(ambiguous, str): - if ambiguous == 'infer': + if ambiguous == "infer": infer_dst = True - elif ambiguous == 'NaT': + elif ambiguous == "NaT": fill = True elif isinstance(ambiguous, bool): is_dst = True @@ -258,23 +258,23 @@ timedelta-like} ambiguous_array = np.ones(len(vals), dtype=bool) else: ambiguous_array = np.zeros(len(vals), dtype=bool) - elif hasattr(ambiguous, '__iter__'): + elif hasattr(ambiguous, "__iter__"): is_dst = True if len(ambiguous) != len(vals): raise ValueError("Length of ambiguous bool-array must be " "the same size as vals") ambiguous_array = np.asarray(ambiguous, dtype=bool) - if nonexistent == 'NaT': + if nonexistent == "NaT": fill_nonexist = True - elif nonexistent == 'shift_forward': + elif nonexistent == "shift_forward": shift_forward = True - elif nonexistent == 'shift_backward': + elif nonexistent == "shift_backward": shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds shift_delta = delta_to_nanoseconds(nonexistent, reso=creso) - elif nonexistent not in ('raise', None): + elif nonexistent not in ("raise", None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") raise ValueError(msg) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 702706f00455b..57ef3601b7461 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1158,11 +1158,11 @@ cdef enum InterpolationType: interpolation_types = { - 'linear': LINEAR, - 'lower': LOWER, - 'higher': HIGHER, - 'nearest': NEAREST, - 'midpoint': MIDPOINT, + "linear": LINEAR, + "lower": LOWER, + "higher": HIGHER, + "nearest": NEAREST, + "midpoint": MIDPOINT, } @@ -1419,7 +1419,7 @@ def roll_apply(object obj, # ndarray input if raw and not arr.flags.c_contiguous: - arr = arr.copy('C') + arr = arr.copy("C") counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 465865dec23c4..02934346130a5 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -53,16 +53,16 @@ def calculate_variable_window_bounds( Py_ssize_t i, j if num_values <= 0: - return np.empty(0, dtype='int64'), np.empty(0, dtype='int64') + return np.empty(0, dtype="int64"), np.empty(0, dtype="int64") # default is 'right' if closed is None: - closed = 'right' + closed = "right" - if closed in ['right', 'both']: + if closed in ["right", "both"]: right_closed = True - if closed in ['left', 'both']: + if closed in ["left", "both"]: left_closed = True # GH 43997: @@ -76,9 +76,9 @@ def calculate_variable_window_bounds( if index[num_values - 1] < index[0]: index_growth_sign = -1 - start = np.empty(num_values, dtype='int64') + start = np.empty(num_values, dtype="int64") start.fill(-1) - end = np.empty(num_values, dtype='int64') + end = np.empty(num_values, dtype="int64") end.fill(-1) start[0] = 0 diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index cd42b08a03474..fbd08687d7c82 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -89,14 +89,14 @@ def convert_json_to_lines(arr: str) -> str: unsigned char val, newline, comma, left_bracket, right_bracket, quote unsigned char backslash - newline = ord('\n') - comma = ord(',') - left_bracket = ord('{') - right_bracket = ord('}') + newline = ord("\n") + comma = ord(",") + left_bracket = ord("{") + right_bracket = ord("}") quote = ord('"') - backslash = ord('\\') + backslash = ord("\\") - narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + narr = np.frombuffer(arr.encode("utf-8"), dtype="u1").copy() length = narr.shape[0] for i in range(length): val = narr[i] @@ -114,7 +114,7 @@ def convert_json_to_lines(arr: str) -> str: if not in_quotes: num_open_brackets_seen -= 1 - return narr.tobytes().decode('utf-8') + '\n' # GH:36888 + return narr.tobytes().decode("utf-8") + "\n" # GH:36888 # stata, pytables diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 8c13566c656b7..7d0f549a2f976 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -343,7 +343,7 @@ cdef class Parser: self.bit_offset = self.parser._page_bit_offset self.subheader_pointer_length = self.parser._subheader_pointer_length self.is_little_endian = parser.byte_order == "<" - self.column_types = np.empty(self.column_count, dtype='int64') + self.column_types = np.empty(self.column_count, dtype="int64") # page indicators self.update_next_page() @@ -352,9 +352,9 @@ cdef class Parser: # map column types for j in range(self.column_count): - if column_types[j] == b'd': + if column_types[j] == b"d": self.column_types[j] = column_type_decimal - elif column_types[j] == b's': + elif column_types[j] == b"s": self.column_types[j] = column_type_string else: raise ValueError(f"unknown column type: {self.parser.columns[j].ctype}")