From 439e129561dfee312c27dc0eb444d0c9396e2cda Mon Sep 17 00:00:00 2001 From: aschmitz <29508+aschmitz@users.noreply.github.com> Date: Tue, 4 Jan 2022 19:03:48 -0600 Subject: [PATCH 1/2] fix: don't wrap `overflow-wrap: anywhere` unnecessarily Previously, if `overflow-wrap: anywhere` (or `break-word`) was set on an inline element (like an `` or ``) whose first word occurred towards the end of a line, it would have broken that word, even if doing so wasn't necessary (and wouldn't have happened were it not for the wrapping element). This pipes through a `is_line_start` argument to `split_text_box` that only allows the break on a word if it's the first word in a line. --- tests/test_text.py | 32 ++++++++++++++++++++++++++++++++ weasyprint/layout/inline.py | 9 ++++++--- weasyprint/layout/preferred.py | 2 +- weasyprint/text/line_break.py | 9 ++++++--- 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/tests/test_text.py b/tests/test_text.py index 99ffbc072..f9be62b57 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -921,6 +921,38 @@ def test_overflow_wrap(wrap, text, test, full_text): assert full_text == lines_full_text +@assert_no_logs +@pytest.mark.parametrize('span_css, expected_lines', ( + # overflow-wrap: anywhere and break-word are only allowed to break a word + # "if there are no otherwise-acceptable break points in the line", which + # means they should not split a word if it fits cleanly into the next line. + # This can be done accidentally if it is in its own inline element. + ('overflow-wrap: anywhere', ['aaa', 'bbb']), + ('overflow-wrap: break-word', ['aaa', 'bbb']), +)) +def test_wrap_overflow_word_break(span_css, expected_lines): + page, = render_pages(''' + + + aaa bbb + ''' % span_css) + html, = page.children + body, = html.children + lines = body.children + lines = [] + print(body.children) + for line in body.children: + line_text = '' + for span_box in line.children: + line_text += span_box.children[0].text + lines.append(line_text) + assert lines == expected_lines + + @assert_no_logs @pytest.mark.parametrize('wrap, text, body_width, expected_width', ( ('anywhere', 'aaaaaa', 10, 20), diff --git a/weasyprint/layout/inline.py b/weasyprint/layout/inline.py index 12f6b7322..d97dcf926 100644 --- a/weasyprint/layout/inline.py +++ b/weasyprint/layout/inline.py @@ -472,8 +472,10 @@ def split_inline_level(context, box, position_x, max_x, bottom_space, skip = skip or 0 assert skip_stack is None + is_line_start = len(line_children) == 0 new_box, skip, preserved_line_break = split_text_box( - context, box, max_x - position_x, skip) + context, box, max_x - position_x, skip, + is_line_start=is_line_start) if skip is None: resume_at = None @@ -882,7 +884,7 @@ def split_inline_box(context, box, position_x, max_x, bottom_space, skip_stack, float_widths) -def split_text_box(context, box, available_width, skip): +def split_text_box(context, box, available_width, skip, is_line_start=True): """Keep as much text as possible from a TextBox in a limited width. Try not to overflow but always have some text in ``new_box``. @@ -900,7 +902,8 @@ def split_text_box(context, box, available_width, skip): if font_size == 0 or not text: return None, None, False layout, length, resume_index, width, height, baseline = split_first_line( - text, box.style, context, available_width, box.justification_spacing) + text, box.style, context, available_width, box.justification_spacing, + is_line_start=is_line_start) assert resume_index != 0 # Convert ``length`` and ``resume_at`` from UTF-8 indexes in text diff --git a/weasyprint/layout/preferred.py b/weasyprint/layout/preferred.py index 124d1c242..64c29e1be 100644 --- a/weasyprint/layout/preferred.py +++ b/weasyprint/layout/preferred.py @@ -307,7 +307,7 @@ def inline_line_widths(context, box, outer, is_line_start, minimum, split_first_line( child_text[resume_index:], child.style, context, max_width, child.justification_spacing, - minimum=True)) + is_line_start=is_line_start, minimum=True)) lines.append(width) if first_line: break diff --git a/weasyprint/text/line_break.py b/weasyprint/text/line_break.py index d5ff3877a..5a387c0f8 100644 --- a/weasyprint/text/line_break.py +++ b/weasyprint/text/line_break.py @@ -302,7 +302,7 @@ def create_layout(text, style, context, max_width, justification_spacing): def split_first_line(text, style, context, max_width, justification_spacing, - minimum=False): + is_line_start=True, minimum=False): """Fit as much as possible in the available width for one line of text. Return ``(layout, length, resume_index, width, height, baseline)``. @@ -545,8 +545,11 @@ def split_first_line(text, style, context, max_width, justification_spacing, first_line_width, _ = line_size(first_line, style) space = max_width - first_line_width # If we can break words and the first line is too long - if space < 0 and (overflow_wrap == 'anywhere' or - (overflow_wrap == 'break-word' and not minimum)): + if space < 0 and (is_line_start and + ( + overflow_wrap == 'anywhere' or + (overflow_wrap == 'break-word' and not minimum) + )): # Is it really OK to remove hyphenation for word-break ? hyphenated = False # TODO: Modify code to preserve W3C condition: From 480a6ed3bf90d74fe246453f138a1b67ea2e22c9 Mon Sep 17 00:00:00 2001 From: aschmitz <29508+aschmitz@users.noreply.github.com> Date: Tue, 4 Jan 2022 19:08:16 -0600 Subject: [PATCH 2/2] feat: support `word-break: break-all` This adds support for the `break-all` value for `word-break`. The `keep-all` and `break-word` properties are not yet supported. --- tests/test_text.py | 4 ++++ weasyprint/css/properties.py | 2 ++ weasyprint/css/validation/properties.py | 7 +++++++ weasyprint/text/line_break.py | 11 ++++++----- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/test_text.py b/tests/test_text.py index f9be62b57..8be756cc3 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -929,6 +929,10 @@ def test_overflow_wrap(wrap, text, test, full_text): # This can be done accidentally if it is in its own inline element. ('overflow-wrap: anywhere', ['aaa', 'bbb']), ('overflow-wrap: break-word', ['aaa', 'bbb']), + + # On the other hand, word-break: break-all mandates a break anywhere at the + # end of a line, even if the word could fit cleanly onto the next line. + ('word-break: break-all', ['aaa b', 'bb']), )) def test_wrap_overflow_word_break(span_css, expected_lines): page, = render_pages(''' diff --git a/weasyprint/css/properties.py b/weasyprint/css/properties.py index d17e3c5b8..65cf5ff03 100644 --- a/weasyprint/css/properties.py +++ b/weasyprint/css/properties.py @@ -154,6 +154,7 @@ 'text_indent': Dimension(0, 'px'), 'text_transform': 'none', 'white_space': 'normal', + 'word_break': 'normal', 'word_spacing': 0, # computed value for 'normal' # Transforms 1 (CR): https://www.w3.org/TR/css-transforms-1/ @@ -270,6 +271,7 @@ 'visibility', 'white_space', 'widows', + 'word_break', 'word_spacing', } diff --git a/weasyprint/css/validation/properties.py b/weasyprint/css/validation/properties.py index 05ed1245a..ab3f24191 100644 --- a/weasyprint/css/validation/properties.py +++ b/weasyprint/css/validation/properties.py @@ -1179,6 +1179,13 @@ def overflow_wrap(keyword): return keyword in ('anywhere', 'normal', 'break-word') +@property() +@single_keyword +def word_break(keyword): + """``word-break`` property validation.""" + return keyword in ('normal', 'break-all') + + @property() @single_token def flex_basis(token): diff --git a/weasyprint/text/line_break.py b/weasyprint/text/line_break.py index 5a387c0f8..42e2c3407 100644 --- a/weasyprint/text/line_break.py +++ b/weasyprint/text/line_break.py @@ -545,11 +545,12 @@ def split_first_line(text, style, context, max_width, justification_spacing, first_line_width, _ = line_size(first_line, style) space = max_width - first_line_width # If we can break words and the first line is too long - if space < 0 and (is_line_start and - ( - overflow_wrap == 'anywhere' or - (overflow_wrap == 'break-word' and not minimum) - )): + if space < 0 and ((style['word_break'] == 'break-all') or + (is_line_start and + ( + overflow_wrap == 'anywhere' or + (overflow_wrap == 'break-word' and not minimum) + ))): # Is it really OK to remove hyphenation for word-break ? hyphenated = False # TODO: Modify code to preserve W3C condition: