Skip to content

Commit

Permalink
tell: do not lstrip leading formatting
Browse files Browse the repository at this point in the history
Using a custom function to do this for now. We'll want to think about
adding formatting-safe functions like this and what `choose` uses to
Sopel's API, as core plugins surely aren't the only ones to run into
such edge cases with leading/trailing formatting bytes.
  • Loading branch information
dgw committed Jul 18, 2021
1 parent 4a50c2e commit d8e60d4
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 2 deletions.
42 changes: 40 additions & 2 deletions sopel/modules/tell.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
import os
import threading
import time
import unicodedata

from sopel import plugin, tools
from sopel import formatting, plugin, tools
from sopel.config import types
from sopel.tools.time import format_time, get_timezone

Expand Down Expand Up @@ -129,6 +130,43 @@ def shutdown(bot):
pass


def _format_safe_lstrip(text):
"""``str.lstrip()`` but without eating IRC formatting.
:param str text: text to clean
:rtype: str
:raises TypeError: if the passed ``text`` is not a string
Stolen and tweaked from the ``choose`` plugin's ``_format_safe()``
function by the person who wrote it.
"""
if not isinstance(text, str):
raise TypeError("A string is required.")
elif not text:
# unnecessary optimization
return ''

start = 0

# strip left
pos = 0
while pos < len(text):
is_whitespace = unicodedata.category(text[pos]) == 'Zs'
is_non_printing = (
text[pos] in formatting.CONTROL_NON_PRINTING and
text[pos] not in formatting.CONTROL_FORMATTING
)
if not is_whitespace and not is_non_printing:
start = pos
break
pos += 1
else:
# skipped everything; string is all whitespace
return ''

return text[start:]


@plugin.command('tell', 'ask')
@plugin.nickname_command('tell', 'ask')
@plugin.example('$nickname, tell dgw he broke something again.')
Expand All @@ -142,7 +180,7 @@ def f_remind(bot, trigger):
return

tellee = trigger.group(3).rstrip('.,:;')
msg = trigger.group(2).lstrip(tellee).lstrip()
msg = _format_safe_lstrip(trigger.group(2).split(' ', 1)[1])

if not msg:
bot.reply("%s %s what?" % (verb, tellee))
Expand Down
102 changes: 102 additions & 0 deletions test/modules/test_modules_tell.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import io
import os

import pytest

from sopel import formatting
from sopel.modules import tell


Expand Down Expand Up @@ -228,3 +231,102 @@ def test_get_reminders():
'Exirel: '
'%s - 14:35:55UTC '
'<HumorBaby> tell Exirel Thanks for the review.' % today)


# Test custom lstrip implementation

UNICODE_ZS_CATEGORY = [
'\u0020', # SPACE
'\u00A0', # NO-BREAK SPACE
'\u1680', # OGHAM SPACE MARK
'\u2000', # EN QUAD
'\u2001', # EM QUAD
'\u2002', # EN SPACE
'\u2003', # EM SPACE
'\u2004', # THREE-PER-EM SPACE
'\u2005', # FOUR-PER-EM SPACE
'\u2006', # SIX-PER-EM SPACE
'\u2007', # FIGURE SPACE
'\u2008', # PUNCTUATION SPACE
'\u2009', # THIN SPACE
'\u200A', # HAIR SPACE
'\u202F', # NARROW NO-BREAK SPACE
'\u205F', # MEDIUM MATHEMATICAL SPACE
'\u3000', # IDEOGRAPHIC SPACE
]

SAFE_PAIRS = (
# regression checks vs. old string.lstrip()
('',
''),
('a', # one iteration of this code returned '' for one-char strings
'a'),
('aa',
'aa'),
# basic whitespace
(' leading space', # removed
'leading space'),
('trailing space ', # kept
'trailing space '),
(' leading AND trailing space ', # removed AND kept
'leading AND trailing space '),
# advanced whitespace
('\tleading tab', # removed
'leading tab'),
('trailing tab\t', # kept
'trailing tab\t'),
# whitespace inside formatting (kept)
('\x02 leading space inside formatting\x02',
'\x02 leading space inside formatting\x02'),
('\x02trailing space inside formatting \x02',
'\x02trailing space inside formatting \x02'),
('\x02 leading AND trailing inside formatting \x02',
'\x02 leading AND trailing inside formatting \x02'),
# whitespace outside formatting
(' \x02leading space outside formatting\x02', # removed
'\x02leading space outside formatting\x02'),
('\x02trailing space outside formatting\x02 ', # kept
'\x02trailing space outside formatting\x02 '),
# whitespace both inside and outside formatting
(' \x02 leading space inside AND outside\x02', # outside removed
'\x02 leading space inside AND outside\x02'),
('\x02trailing space inside AND outside \x02 ', # left alone
'\x02trailing space inside AND outside \x02 '),
(' \x02 leading AND trailing inside AND outside \x02 ', # only leading removed
'\x02 leading AND trailing inside AND outside \x02 '),
)


def test_format_safe_lstrip_basic():
"""Test handling of basic whitespace."""
assert tell._format_safe_lstrip(
''.join(UNICODE_ZS_CATEGORY)) == ''


def test_format_safe_lstrip_control():
"""Test handling of non-printing control characters."""
all_formatting = ''.join(formatting.CONTROL_FORMATTING)

# no formatting chars should be stripped,
# but a reset should be added to the end
assert tell._format_safe_lstrip(all_formatting) == all_formatting

# control characters not recognized as formatting should be stripped
assert tell._format_safe_lstrip(
''.join(
c
for c in formatting.CONTROL_NON_PRINTING
if c not in formatting.CONTROL_FORMATTING
)) == ''


def test_format_safe_lstrip_invalid_arg():
"""Test for correct exception if non-string is passed."""
with pytest.raises(TypeError):
tell._format_safe_lstrip(None)


@pytest.mark.parametrize('text, cleaned', SAFE_PAIRS)
def test_format_safe_lstrip_pairs(text, cleaned):
"""Test expected formatting-safe string sanitization."""
assert tell._format_safe_lstrip(text) == cleaned

0 comments on commit d8e60d4

Please sign in to comment.