Skip to content

Commit

Permalink
Refactor quoted dialogue detection (#2138)
Browse files Browse the repository at this point in the history
  • Loading branch information
vkbo authored Dec 23, 2024
2 parents cc5cd50 + 00d4c55 commit f702155
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 7 deletions.
22 changes: 17 additions & 5 deletions novelwriter/text/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@

from novelwriter import CONFIG
from novelwriter.common import compact, uniqueCompact
from novelwriter.constants import nwRegEx
from novelwriter.constants import nwRegEx, nwUnicode


class RegExPatterns:

AMBIGUOUS = (nwUnicode.U_APOS, nwUnicode.U_RSQUO)

# Static RegExes
_rxUrl = re.compile(nwRegEx.URL, re.ASCII)
_rxWords = re.compile(nwRegEx.WORDS, re.UNICODE)
Expand Down Expand Up @@ -87,16 +89,25 @@ def shortcodeValue(self) -> re.Pattern:
def dialogStyle(self) -> re.Pattern | None:
"""Dialogue detection rule based on user settings."""
if CONFIG.dialogStyle > 0:
end = "|$" if CONFIG.allowOpenDial else ""
rx = []
if CONFIG.dialogStyle in (1, 3):
qO = CONFIG.fmtSQuoteOpen.strip()[:1]
qC = CONFIG.fmtSQuoteClose.strip()[:1]
rx.append(f"(?:\\B{qO}.*?(?:{qC}\\B{end}))")
if qO == qC:
rx.append(f"(?:\\B{qO}.+?{qC}\\B)")
else:
rx.append(f"(?:{qO}[^{qO}]+{qC})")
if CONFIG.allowOpenDial:
rx.append(f"(?:{qO}.+?$)")
if CONFIG.dialogStyle in (2, 3):
qO = CONFIG.fmtDQuoteOpen.strip()[:1]
qC = CONFIG.fmtDQuoteClose.strip()[:1]
rx.append(f"(?:\\B{qO}.*?(?:{qC}\\B{end}))")
if qO == qC:
rx.append(f"(?:\\B{qO}.+?{qC}\\B)")
else:
rx.append(f"(?:{qO}[^{qO}]+{qC})")
if CONFIG.allowOpenDial:
rx.append(f"(?:{qO}.+?$)")
return re.compile("|".join(rx), re.UNICODE)
return None

Expand All @@ -106,7 +117,8 @@ def altDialogStyle(self) -> re.Pattern | None:
if CONFIG.altDialogOpen and CONFIG.altDialogClose:
qO = re.escape(compact(CONFIG.altDialogOpen))
qC = re.escape(compact(CONFIG.altDialogClose))
return re.compile(f"\\B{qO}.*?{qC}\\B", re.UNICODE)
qB = r"\B" if (qO == qC or qC in self.AMBIGUOUS) else ""
return re.compile(f"{qO}.*?{qC}{qB}", re.UNICODE)
return None


Expand Down
80 changes: 78 additions & 2 deletions tests/test_text/test_text_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,27 @@ def testTextPatterns_DialogueStyle():
# Straight double quotes are ignored
assert allMatches(regEx, "one \"two\" three") == []

# Skipping whitespace is not allowed
assert allMatches(regEx, "one\u2018two\u2019three") == []
# Check with no whitespace, single quote
assert allMatches(regEx, "one\u2018two\u2019three") == [
[("\u2018two\u2019", 3, 8)]
]
assert allMatches(regEx, "one\u2018two\u2019 three") == [
[("\u2018two\u2019", 3, 8)]
]

# Check with no whitespace, double quote
assert allMatches(regEx, "one\u201ctwo\u201dthree") == [
[("\u201ctwo\u201d", 3, 8)]
]
assert allMatches(regEx, "one\u201ctwo\u201d three") == [
[("\u201ctwo\u201d", 3, 8)]
]

# Check with apostrophe
assert allMatches(regEx, "one \u2018two\u2019s three\u2019, \u2018four\u2019 five") == [
[("\u2018two\u2019s three\u2019", 4, 17)],
[("\u2018four\u2019", 19, 25)],
]

# Open
# ====
Expand All @@ -333,6 +352,63 @@ def testTextPatterns_DialogueStyle():
]


@pytest.mark.core
def testTextPatterns_DialoguePlain():
"""Test the dialogue style pattern regexes for plain quotes."""
# Set the config
CONFIG.fmtSQuoteOpen = "'"
CONFIG.fmtSQuoteClose = "'"
CONFIG.fmtDQuoteOpen = '"'
CONFIG.fmtDQuoteClose = '"'

CONFIG.dialogStyle = 3
CONFIG.allowOpenDial = False
regEx = REGEX_PATTERNS.dialogStyle
assert regEx is not None

# Double
# ======

# One double quoted string
assert allMatches(regEx, "one \"two\" three") == [
[("\"two\"", 4, 9)]
]

# Two double quoted strings
assert allMatches(regEx, "one \"two\" three \"four\" five") == [
[("\"two\"", 4, 9)], [("\"four\"", 16, 22)],
]

# No space
assert allMatches(regEx, "one\"two\" three") == []
assert allMatches(regEx, "one \"two\"three") == []
assert allMatches(regEx, "one\"two\"three") == []

# Single
# ======

# One single quoted string
assert allMatches(regEx, "one 'two' three") == [
[("'two'", 4, 9)]
]

# Two single quoted strings
assert allMatches(regEx, "one 'two' three 'four' five") == [
[("'two'", 4, 9)], [("'four'", 16, 22)],
]

# No space
assert allMatches(regEx, "one'two' three") == []
assert allMatches(regEx, "one 'two'three") == []
assert allMatches(regEx, "one'two'three") == []

# Check with apostrophe
assert allMatches(regEx, "one 'two's three', 'four' five") == [
[("'two's three'", 4, 17)],
[("'four'", 19, 25)],
]


@pytest.mark.core
def testTextPatterns_DialogueSpecial():
"""Test the special dialogue style pattern regexes."""
Expand Down

0 comments on commit f702155

Please sign in to comment.