Skip to content

Commit

Permalink
Rewrite logic for inline formatting (#5)
Browse files Browse the repository at this point in the history
This commit simplifies the convoluted method in which inline
formatting was handled.

By splitting the overlapping inline formatting intervals into
discrete slices that signifies how to style that slice, the logic for
inline formatting can be simplified to a single loop instead of the
recursive series of checks and dozens of while loops.

This improves performance, code clarity, and fixes some long-standing
bugs in the renderer.

The caveat however is that the resulting HTML now has the chance to
be much larger than before. This should be addressed as soon as
possible.
  • Loading branch information
syeopite authored Jan 8, 2024
1 parent fbe3890 commit e72e22a
Show file tree
Hide file tree
Showing 10 changed files with 552 additions and 737 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ classifiers = [
"Topic :: Software Development :: Libraries :: Python Modules",
]
requires-python = ">=3.10"
dependencies = ["dominate==2.8.0"]
dependencies = ["dominate==2.8.0", "intervaltree==3.1.0"]

[project.optional-dependencies]
dev = ["pytest==7.4.2", "prettyprinter==0.18.0"]
Expand Down
322 changes: 39 additions & 283 deletions src/npf_renderer/format/inline.py

Large diffs are not rendered by default.

47 changes: 22 additions & 25 deletions src/npf_renderer/objects/inline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Objects storing data for inline formatting used in NPF's Text Content Block"""

import enum
from typing import NamedTuple, Union
from typing import NamedTuple, Union, Sequence


class FMTTypes(enum.Enum):
Expand All @@ -13,51 +13,48 @@ class FMTTypes(enum.Enum):
MENTION = 5
COLOR = 6

TOTAL_OVERLAP_PACKAGE = 7


class Standard(NamedTuple):
class Instruction(NamedTuple):
"""A tuple storing data on various inline formatting options"""
type: FMTTypes
start: int
end: int
type_: FMTTypes

def __lt__(self, other):
return self.type_.value < other.type_.value

class Link(NamedTuple):

class LinkInstruction(NamedTuple):
"""A tuple storing data on formatting an inline link"""
type: FMTTypes
start: int
end: int
type_: FMTTypes
url: str

def __lt__(self, other):
return self.type_.value < other.type_.value

class Mention(NamedTuple):
class MentionInstruction(NamedTuple):
"""A tuple storing data on formatting an inline mention of a blog"""
type: FMTTypes
start: int
end: int
type_: FMTTypes

blog_name: str
blog_url: str
blog_uuid: str

def __lt__(self, other):
return self.type_.value < other.type_.value


class Color(NamedTuple):
class ColorInstruction(NamedTuple):
"""A tuple storing data on formatting colored text"""
type: FMTTypes
start: int
end: int
type_: FMTTypes
hex: str

def __lt__(self, other):
return self.type_.value < other.type_.value

class TotalOverlaps(NamedTuple):
"""A tuple storing data on formatting operations that overlaps from start to finish

This allows for easily constructing the nested HTML tags that comes out of this.
"""
type: list[Union[Standard, Link, Mention, Color]]
class StyleInterval(NamedTuple):
start: int
end: int
instructions: Sequence[Union[Instruction, LinkInstruction, MentionInstruction, ColorInstruction]]


INLINE_FMT_TYPES = Union[Standard, Link, Mention, Color, TotalOverlaps]
INLINE_FMT_TYPES = Union[Instruction, LinkInstruction, MentionInstruction, ColorInstruction]
108 changes: 59 additions & 49 deletions src/npf_renderer/parse/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""

import intervaltree

from . import misc
from .. import helpers
from ..objects import inline, text_block, image, link_block, video_block , unsupported
Expand Down Expand Up @@ -104,79 +106,87 @@ def create_text_block(text_, subtype_, inline_formatting_, nest_=None):
return create_text_block(text, subtype, inline_formats, nest_=nest_array)

@staticmethod
def route_inline_format(inline_format, start, end):
def route_inline_format(inline_format):
inline_type = getattr(inline.FMTTypes, inline_format["type"].upper())

match inline_type:
case (inline.FMTTypes.BOLD | inline.FMTTypes.ITALIC |
inline.FMTTypes.STRIKETHROUGH | inline.FMTTypes.SMALL):
return inline.Standard(
start=start,
end=end,
type=inline_type
return inline.Instruction(
type_=inline_type
)
case inline.FMTTypes.LINK:
return inline.Link(
start=start,
end=end,
type=inline_type,
return inline.LinkInstruction(
type_=inline_type,
url=inline_format["url"]
)
case inline.FMTTypes.MENTION:
blog = inline_format["blog"]
return inline.Mention(
start=start,
end=end,
type=inline_type,
return inline.MentionInstruction(
type_=inline_type,

blog_name=blog["name"],
blog_uuid=blog["uuid"],
blog_url=blog["url"]
)
case inline.FMTTypes.COLOR:
return inline.Color(
start=start,
end=end,
type=inline_type,

return inline.ColorInstruction(
type_=inline_type,
hex=inline_format["hex"],
)

def _parse_inline_text(self, inline_formatting):
def _parse_inline_text(self, raw_inline_formatting):
"""Parses the inline formatting of a content block into an array of inline fmt objects"""
inline_formats = []
inline_formatting_iter = helpers.CursorIterator(inline_formatting)
while not inline_formatting_iter._at_end:
inline_formatting_iter.next()

inline_format = inline_formatting_iter.current
start, end = inline_format["start"], inline_format["end"]
current_parsed_inline_fmt = self.route_inline_format(inline_format, start, end)

overlapping_formats = []
while peek := inline_formatting_iter.peek():
p_start, p_end = peek["start"], peek["end"]

if start == p_start and end == p_end:
overlapping_formats.append(self.route_inline_format(peek, p_start, p_end))
inline_formatting_iter.next()

# The Interval Tree is needed to convert NPF inline fmt intervals
# into discrete non-overlapping chunks.
#
# This is to simplify the final formatting operation.
#
# For instance:
#
# {'end': 1, 'start': 5, 'type': 'bold'} and {'end': 2, 'start': 7, 'type': 'italics'}
#
# Would be converted to something like:
# [1, 2, [bold]], [2,5, [bold, italics]], [5,7, [italics]]
#

inline_format_intervals = intervaltree.IntervalTree()

# Insert
for raw_inline in raw_inline_formatting:
start = raw_inline["start"]
end = raw_inline["end"]

inline_format_intervals[start:end] = self.route_inline_format(raw_inline)

inline_format_intervals.split_overlaps()
inline_format_intervals = sorted(inline_format_intervals.items())

# Merge duplicates
latch = None
discrete_formatting_instructions = []
for interval in inline_format_intervals:
if latch:
if latch[0] == interval.begin and latch[1] == interval.end:
latch[2].append(interval.data) # Data being a formatting instruction
else:
# Tumblr's API should return the list of inline fmts sorted. So if even one doesn't match then
# we shouldn't have any overlapping ranges with same start and end
break

if overlapping_formats:
inline_formats.append(
inline.TotalOverlaps(
type=[current_parsed_inline_fmt] + overlapping_formats,
start=start,
end=end
)
)
discrete_formatting_instructions.append(latch)
latch = [interval[0], interval[1], [interval[2]]]
else:
inline_formats.append(current_parsed_inline_fmt)
latch = [interval[0], interval[1], [interval[2]]]

# Validates that everything got added
if not discrete_formatting_instructions:
discrete_formatting_instructions.append(latch)
else:
last_raw_style = inline_format_intervals[-1]
last_processed_style = discrete_formatting_instructions[-1]

if last_raw_style.begin != last_processed_style[0] and last_raw_style.end != last_processed_style[1]:
discrete_formatting_instructions.append(latch)

# Package
inline_formats = [inline.StyleInterval(interval[0], interval[1], sorted(interval[2])) for interval in discrete_formatting_instructions]
return inline_formats

def _parse_image_block(self):
Expand Down
10 changes: 7 additions & 3 deletions tests/image_block/image_block_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,15 @@ def format_constructor(*children):
objects.text_block.TextBlock(
text="Check out my commission from author! Please follow them here https://twitter.com/example",
inline_formatting= [
objects.inline.Link(
type=objects.inline.FMTTypes.LINK,
objects.inline.StyleInterval(
start=61,
end=88,
url="https://twitter.com/example"
instructions=[
objects.inline.LinkInstruction(
type_=objects.inline.FMTTypes.LINK,
url="https://twitter.com/example"
)
],
)
]
),
Expand Down
Loading

0 comments on commit e72e22a

Please sign in to comment.