Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

choose: safely handle formatted options #1965

Merged
merged 7 commits into from
Dec 10, 2020
Merged
13 changes: 13 additions & 0 deletions sopel/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,19 @@
CONTROL_REVERSE = '\x16'
"""The control code to start or end reverse-color formatting."""

CONTROL_FORMATTING = [
CONTROL_NORMAL,
CONTROL_COLOR,
CONTROL_HEX_COLOR,
CONTROL_BOLD,
CONTROL_ITALIC,
CONTROL_UNDERLINE,
CONTROL_STRIKETHROUGH,
CONTROL_MONOSPACE,
CONTROL_REVERSE,
]
"""A list of all control characters expected to appear as formatting."""

CONTROL_NON_PRINTING = [
'\x00',
'\x01',
Expand Down
64 changes: 62 additions & 2 deletions sopel/modules/choose.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,68 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import random
import unicodedata

from sopel import plugin
from sopel import formatting, plugin

# Remove when dropping py2 support
try:
str = unicode
except NameError:
pass


def _format_safe(text):
"""Remove excess whitespace and terminate IRC formatting.

:param str text: text to clean of whitespace
:rtype: str
:raises TypeError: if the passed ``text`` is not a string

Our own take on ``str.strip()`` that skips stripping off IRC formatting
and makes sure any formatting codes are closed if necessary.
"""
if not isinstance(text, str):
raise TypeError("A string is required.")
elif not text:
# unnecessary optimization
return ''

start = end = 0
Exirel marked this conversation as resolved.
Show resolved Hide resolved

# strip left
pos = 0
while pos < len(text):
is_whitespace = unicodedata.category(text[pos]) == 'Zs'
is_non_printing = (
text[pos] in formatting.CONTROL_NON_PRINTING and
text[pos] not in formatting.CONTROL_FORMATTING
)
if not is_whitespace and not is_non_printing:
start = pos
break
pos += 1

# strip right
pos = len(text) - 1
while pos >= 0:
is_whitespace = unicodedata.category(text[pos]) == 'Zs'
is_non_printing = (
text[pos] in formatting.CONTROL_NON_PRINTING and
text[pos] not in formatting.CONTROL_FORMATTING
)
if not is_whitespace and not is_non_printing:
end = pos + 1
break
pos -= 1

# build the final string
safe = text[start:end]
if any(c in safe for c in formatting.CONTROL_FORMATTING):
# if it includes IRC formatting, append reset character just in case
safe += formatting.CONTROL_NORMAL

return safe


@plugin.command('choose', 'choice', 'ch')
Expand All @@ -40,7 +100,7 @@ def choose(bot, trigger):
choices = trigger.group(2).split(delim)
if len(choices) > 1:
break
choices = [choice.strip() for choice in choices]
choices = [_format_safe(choice) for choice in choices]
pick = random.choice(choices)

# Always use a comma in the output
Expand Down
148 changes: 148 additions & 0 deletions test/modules/test_modules_choose.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# coding=utf-8
"""Tests for Sopel's ``choose`` plugin"""
from __future__ import absolute_import, division, print_function, unicode_literals

import pytest

from sopel import formatting
from sopel.modules import choose


UNICODE_ZS_CATEGORY = [
'\u0020', # SPACE
'\u00A0', # NO-BREAK SPACE
'\u1680', # OGHAM SPACE MARK
'\u2000', # EN QUAD
'\u2001', # EM QUAD
'\u2002', # EN SPACE
'\u2003', # EM SPACE
'\u2004', # THREE-PER-EM SPACE
'\u2005', # FOUR-PER-EM SPACE
'\u2006', # SIX-PER-EM SPACE
'\u2007', # FIGURE SPACE
'\u2008', # PUNCTUATION SPACE
'\u2009', # THIN SPACE
'\u200A', # HAIR SPACE
'\u202F', # NARROW NO-BREAK SPACE
'\u205F', # MEDIUM MATHEMATICAL SPACE
'\u3000', # IDEOGRAPHIC SPACE
]

SAFE_PAIRS = (
# regression checks vs. old string.strip()
('',
''),
('a', # one iteration of this code returned '' for one-char strings
'a'),
('aa',
'aa'),
('\x02', # special case of 'a', one-char string that needs reset
'\x02\x0f'),
# basic whitespace (dropped)
(' leading space',
'leading space'),
('trailing space ',
'trailing space'),
(' leading AND trailing space ',
'leading AND trailing space'),
# advanced whitespace (dropped)
('\tleading tab',
'leading tab'),
('trailing tab\t',
'trailing tab'),
# whitespace inside formatting (kept)
('\x02 leading space inside formatting\x02',
'\x02 leading space inside formatting\x02\x0f'),
('\x02trailing space inside formatting \x02',
'\x02trailing space inside formatting \x02\x0f'),
('\x02 leading AND trailing inside formatting \x02',
'\x02 leading AND trailing inside formatting \x02\x0f'),
# whitespace outside formatting (dropped)
(' \x02leading space outside formatting\x02',
'\x02leading space outside formatting\x02\x0f'),
('\x02trailing space outside formatting\x02 ',
'\x02trailing space outside formatting\x02\x0f'),
# whitespace both inside and outside formatting
# (respectively kept and dropped)
(' \x02 leading space inside AND outside\x02',
'\x02 leading space inside AND outside\x02\x0f'),
('\x02trailing space inside AND outside \x02 ',
'\x02trailing space inside AND outside \x02\x0f'),
(' \x02 leading AND trailing inside AND outside \x02 ',
'\x02 leading AND trailing inside AND outside \x02\x0f'),
# unmatched formatting
('\x02unterminated bold',
'\x02unterminated bold\x0f'),
('only last word \x02bold',
'only last word \x02bold\x0f'),
(' leading space, \x03italic\x03, and \x02bold with extra spaces ',
'leading space, \x03italic\x03, and \x02bold with extra spaces\x0f'),
)


def test_format_safe_basic():
"""Test handling of basic whitespace."""
assert choose._format_safe(
''.join(UNICODE_ZS_CATEGORY)) == ''


def test_format_safe_control():
"""Test handling of non-printing control characters."""
all_formatting = ''.join(formatting.CONTROL_FORMATTING)

# no formatting chars should be stripped,
# but a reset should be added to the end
assert choose._format_safe(all_formatting) == all_formatting + '\x0f'

# control characters not recognized as formatting should be stripped
assert choose._format_safe(
''.join(
c
for c in formatting.CONTROL_NON_PRINTING
if c not in formatting.CONTROL_FORMATTING
)) == ''


def test_format_safe_invalid_arg():
"""Test for correct exception if non-string is passed."""
with pytest.raises(TypeError):
choose._format_safe(None)


@pytest.mark.parametrize('text, cleaned', SAFE_PAIRS)
def test_format_safe_pairs(text, cleaned):
"""Test expected formatting-safe string sanitization."""
assert choose._format_safe(text) == cleaned


# --- Insert reset only when necessary | Expected to fail --- #

EFFICIENT_PAIRS = (
# whitespace inside formatting (kept)
('\x02 leading space inside formatting\x02',
'\x02 leading space inside formatting\x02'),
('\x02trailing space inside formatting \x02',
'\x02trailing space inside formatting \x02'),
('\x02 leading AND trailing inside formatting \x02',
'\x02 leading AND trailing inside formatting \x02'),
# whitespace outside formatting (dropped)
(' \x02leading space outside formatting\x02',
'\x02leading space outside formatting\x02'),
('\x02trailing space outside formatting\x02 ',
'\x02trailing space outside formatting\x02'),
# whitespace both inside and outside formatting
# (respectively kept and dropped)
(' \x02 leading space inside AND outside\x02',
'\x02 leading space inside AND outside\x02'),
('\x02trailing space inside AND outside \x02 ',
'\x02trailing space inside AND outside \x02'),
(' \x02 leading AND trailing inside AND outside \x02 ',
'\x02 leading AND trailing inside AND outside \x02'),
)


@pytest.mark.parametrize('text, cleaned', EFFICIENT_PAIRS)
@pytest.mark.xfail(strict=True)
def test_format_safe_future(text, cleaned):
"""Test future aspirations of efficiency."""
assert choose._format_safe(text) == cleaned
2 changes: 1 addition & 1 deletion test/modules/test_modules_isup.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# coding=utf-8
"""Tests for Sopel's ``remind`` plugin"""
"""Tests for Sopel's ``isup`` plugin"""
dgw marked this conversation as resolved.
Show resolved Hide resolved
from __future__ import absolute_import, division, print_function, unicode_literals

import pytest
Expand Down