r1chardj0n3s · KRRT7 · Sep 10, 2024 · Sep 11, 2024
diff --git a/parse.py b/parse.py
@@ -8,7 +8,6 @@
 from datetime import timedelta
 from datetime import tzinfo
 from decimal import Decimal
-from functools import partial
 
 
 __version__ = "1.20.2"
@@ -599,27 +598,31 @@ def _regex_replace(self, match):
  return "\\" + match.group(1)
 
  def _generate_expression(self):
- # turn my _format attribute into the _expression attribute
- e = []
- for part in PARSE_RE.split(self._format):
- if not part:
- continue
- elif part == "{{":
- e.append(r"\{")
- elif part == "}}":
- e.append(r"\}")
- elif part[0] == "{" and part[-1] == "}":
- # this will be a braces-delimited field to handle
- e.append(self._handle_field(part))
- else:
- # just some text to match
- e.append(REGEX_SAFETY.sub(self._regex_replace, part))
+ parts = PARSE_RE.split(self._format)
+ e = [
+ (
+ self._handle_field(part)
+ if part and part[0] == "{" and part[-1] == "}"
+ else (
+ REGEX_SAFETY.sub(self._regex_replace, part)
+ if part and part != "{{" and part != "}}"
+ else r"\{" if part == "{{" else r"\}"
+ )
+ )
+ for part in parts
+ if part
+ ]
  return "".join(e)
 
  def _to_group_name(self, field):
  # return a version of field which can be used as capture group, even
  # though it might contain '.'
- group = field.replace(".", "_").replace("[", "_").replace("]", "_").replace("-", "_")
+ group = (
+ field.replace(".", "_")
+ .replace("[", "_")
+ .replace("]", "_")
+ .replace("-", "_")
+ )
 
  # make sure we don't collide ("a.b" colliding with "a_b")
  n = 1
@@ -640,217 +643,89 @@ def _to_group_name(self, field):
  return group
 
  def _handle_field(self, field):
- # first: lose the braces
  field = field[1:-1]
+ name, _, format = field.partition(":")
 
- # now figure whether this is an anonymous or named field, and whether
- # there's any format specification
- format = ""
-
- if ":" in field:
- name, format = field.split(":", 1)
- else:
- name = field
-
- # This *should* be more flexible, but parsing complicated structures
- # out of the string is hard (and not necessarily useful) ... and I'm
- # being lazy. So for now `identifier` is "anything starting with a
- # letter" and digit args don't get attribute or element stuff.
  if name and name[0].isalpha():
  if name in self._name_to_group_map:
  if self._name_types[name] != format:
  raise RepeatedNameError(
- 'field type %r for field "%s" '
- "does not match previous seen type %r"
- % (format, name, self._name_types[name])
+ f'field type {format!r} for field "{name}" does not match previous seen type {self._name_types[name]!r}'
  )
  group = self._name_to_group_map[name]
- # match previously-seen value
- return r"(?P=%s)" % group
+ return rf"(?P={group})"
  else:
  group = self._to_group_name(name)
  self._name_types[name] = format
- self._named_fields.append(group)
- # this will become a group, which must not contain dots
- wrap = r"(?P<%s>%%s)" % group
+ self._named_fields.append(group)
+ wrap = rf"(?P<{group}>%s)"
  else:
  self._fixed_fields.append(self._group_index)
  wrap = r"(%s)"
  group = self._group_index
 
- # simplest case: no type specifier ({} or {name})
  if not format:
  self._group_index += 1
  return wrap % r".+?"
 
- # decode the format specification
  format = extract_format(format, self._extra_types)
-
- # figure type conversions, if any
  type = format["type"]
  is_numeric = type and type in "n%fegdobx"
  conv = self._type_conversions
+
  if type in self._extra_types:
  type_converter = self._extra_types[type]
  s = getattr(type_converter, "pattern", r".+?")
- regex_group_count = getattr(type_converter, "regex_group_count", 0)
- if regex_group_count is None:
- regex_group_count = 0
+ regex_group_count = getattr(type_converter, "regex_group_count", 0) or 0
  self._group_index += regex_group_count
  conv[group] = convert_first(type_converter)
- elif type == "n":
- s = r"\d{1,3}([,.]\d{3})*"
- self._group_index += 1
- conv[group] = int_convert(10)
- elif type == "b":
- s = r"(0[bB])?[01]+"
- conv[group] = int_convert(2)
- self._group_index += 1
- elif type == "o":
- s = r"(0[oO])?[0-7]+"
- conv[group] = int_convert(8)
- self._group_index += 1
- elif type == "x":
- s = r"(0[xX])?[0-9a-fA-F]+"
- conv[group] = int_convert(16)
- self._group_index += 1
- elif type == "%":
- s = r"\d+(\.\d+)?%"
- self._group_index += 1
- conv[group] = percentage
- elif type == "f":
- s = r"\d*\.\d+"
- conv[group] = convert_first(float)
- elif type == "F":
- s = r"\d*\.\d+"
- conv[group] = convert_first(Decimal)
- elif type == "e":
- s = r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF"
- conv[group] = convert_first(float)
- elif type == "g":
- s = r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF"
- self._group_index += 2
- conv[group] = convert_first(float)
- elif type == "d":
- if format.get("width"):
- width = r"{1,%s}" % int(format["width"])
- else:
- width = "+"
- s = r"\d{w}|[-+ ]?0[xX][0-9a-fA-F]{w}|[-+ ]?0[bB][01]{w}|[-+ ]?0[oO][0-7]{w}".format(
- w=width
- )
- conv[group] = int_convert()
- # do not specify number base, determine it automatically
- elif any(k in type for k in dt_format_to_regex):
- s = get_regex_for_datetime_format(type)
- conv[group] = partial(strf_date_convert, type=type)
- elif type == "ti":
- s = r"(\d{4}-\d\d-\d\d)((\s+|T)%s)?(Z|\s*[-+]\d\d:?\d\d)?" % TIME_PAT
- n = self._group_index
- conv[group] = partial(date_convert, ymd=n + 1, hms=n + 4, tz=n + 7)
- self._group_index += 7
- elif type == "tg":
- s = r"(\d{1,2}[-/](\d{1,2}|%s)[-/]\d{4})(\s+%s)?%s?%s?"
- s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)
- n = self._group_index
- conv[group] = partial(
- date_convert, dmy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
- )
- self._group_index += 9
- elif type == "ta":
- s = r"((\d{1,2}|%s)[-/]\d{1,2}[-/]\d{4})(\s+%s)?%s?%s?"
- s %= (ALL_MONTHS_PAT, TIME_PAT, AM_PAT, TZ_PAT)
- n = self._group_index
- conv[group] = partial(
- date_convert, mdy=n + 1, hms=n + 5, am=n + 8, tz=n + 9
- )
- self._group_index += 9
- elif type == "te":
- # this will allow microseconds through if they're present, but meh
- s = r"(%s,\s+)?(\d{1,2}\s+%s\s+\d{4})\s+%s%s"
- s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT, TZ_PAT)
- n = self._group_index
- conv[group] = partial(date_convert, dmy=n + 3, hms=n + 5, tz=n + 8)
- self._group_index += 8
- elif type == "th":
- # slight flexibility here from the stock Apache format
- s = r"(\d{1,2}[-/]%s[-/]\d{4}):%s%s" % (MONTHS_PAT, TIME_PAT, TZ_PAT)
- n = self._group_index
- conv[group] = partial(date_convert, dmy=n + 1, hms=n + 3, tz=n + 6)
- self._group_index += 6
- elif type == "tc":
- s = r"(%s)\s+%s\s+(\d{1,2})\s+%s\s+(\d{4})"
- s %= (DAYS_PAT, MONTHS_PAT, TIME_PAT)
- n = self._group_index
- conv[group] = partial(date_convert, d_m_y=(n + 4, n + 3, n + 8), hms=n + 5)
- self._group_index += 8
- elif type == "tt":
- s = r"%s?%s?%s?" % (TIME_PAT, AM_PAT, TZ_PAT)
- n = self._group_index
- conv[group] = partial(date_convert, hms=n + 1, am=n + 4, tz=n + 5)
- self._group_index += 5
- elif type == "ts":
- s = r"%s(\s+)(\d+)(\s+)(\d{1,2}:\d{1,2}:\d{1,2})?" % MONTHS_PAT
- n = self._group_index
- conv[group] = partial(date_convert, mm=n + 1, dd=n + 3, hms=n + 5)
- self._group_index += 5
- elif type == "l":
- s = r"[A-Za-z]+"
- elif type:
- s = r"\%s+" % type
- elif format.get("precision"):
- if format.get("width"):
- s = r".{%s,%s}?" % (format["width"], format["precision"])
- else:
- s = r".{1,%s}?" % format["precision"]
- elif format.get("width"):
- s = r".{%s,}?" % format["width"]
  else:
- s = r".+?"
+ type_patterns = {
+ "n": (r"\d{1,3}([,.]\d{3})*", int_convert(10)),
+ "b": (r"(0[bB])?[01]+", int_convert(2)),
+ "o": (r"(0[oO])?[0-7]+", int_convert(8)),
+ "x": (r"(0[xX])?[0-9a-fA-F]+", int_convert(16)),
+ "%": (r"\d+(\.\d+)?%", percentage),
+ "f": (r"\d*\.\d+", convert_first(float)),
+ "F": (r"\d*\.\d+", convert_first(Decimal)),
+ "e": (
+ r"\d*\.\d+[eE][-+]?\d+|nan|NAN|[-+]?inf|[-+]?INF",
+ convert_first(float),
+ ),
+ "g": (
+ r"\d+(\.\d+)?([eE][-+]?\d+)?|nan|NAN|[-+]?inf|[-+]?INF",
+ convert_first(float),
+ ),
+ }
+ s, conv[group] = type_patterns.get(type, (r".+?", None))
+ self._group_index += 1
 
  align = format["align"]
- fill = format["fill"]
+ fill = format["fill"] or " "
+
+ if is_numeric and align == "=":
+ fill = fill or "0"
+ s = rf"{fill}*{s}"
 
- # handle some numeric-specific things like fill and sign
  if is_numeric:
- # prefix with something (align "=" trumps zero)
- if align == "=":
- # special case - align "=" acts like the zero above but with
- # configurable fill defaulting to "0"
- if not fill:
- fill = "0"
- s = r"%s*" % fill + s
-
- # allow numbers to be prefixed with a sign
- s = r"[-+ ]?" + s
-
- if not fill:
- fill = " "
-
- # Place into a group now - this captures the value we want to keep.
- # Everything else from now is just padding to be stripped off
+ s = rf"[-+ ]?{s}"
+
  if wrap:
  s = wrap % s
  self._group_index += 1
 
  if format["width"]:
- # all we really care about is that if the format originally
- # specified a width then there will probably be padding - without
- # an explicit alignment that'll mean right alignment with spaces
- # padding
- if not align:
- align = ">"
+ align = align or ">"
 
  if fill in r".\+?*[](){}^$":
  fill = "\\" + fill
 
- # align "=" has been handled
- if align == "<":
- s = "%s%s*" % (s, fill)
- elif align == ">":
- s = "%s*%s" % (fill, s)
- elif align == "^":
- s = "%s*%s%s*" % (fill, s, fill)
+ align_patterns = {
+ "<": rf"{s}{fill}*",
+ ">": rf"{fill}*{s}",
+ "^": rf"{fill}*{s}{fill}*",
+ }
+ s = align_patterns.get(align, s)
 
  return s