From f0a49e6aacc86ca2f11fd672a9bd4920f1cb5fdc Mon Sep 17 00:00:00 2001 From: Bernhard B Date: Sat, 21 Dec 2024 22:10:35 +0100 Subject: [PATCH] added new TextstyleParser implementation * the old implementation was cumbersome to maintain and had some problems with nested formatting. see #630 --- Dockerfile | 2 +- src/client/client.go | 3 +- src/utils/textstyleparser.go | 224 +++++++++++++++++------------- src/utils/textstyleparser_test.go | 55 ++++++-- 4 files changed, 168 insertions(+), 116 deletions(-) diff --git a/Dockerfile b/Dockerfile index c739ee7..d7d6460 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ ARG GRAALVM_VERSION=21.0.0 ARG BUILD_VERSION_ARG=unset -FROM golang:1.22-bookworm AS buildcontainer +FROM golang:1.23-bookworm AS buildcontainer ARG SIGNAL_CLI_VERSION ARG LIBSIGNAL_CLIENT_VERSION diff --git a/src/client/client.go b/src/client/client.go index a904727..e8111b2 100644 --- a/src/client/client.go +++ b/src/client/client.go @@ -398,7 +398,8 @@ func (s *SignalClient) send(signalCliSendRequest ds.SignalCliSendRequest) (*Send signalCliTextFormatStrings := []string{} if signalCliSendRequest.TextMode != nil && *signalCliSendRequest.TextMode == "styled" { - signalCliSendRequest.Message, signalCliTextFormatStrings = utils.ParseMarkdownMessage(signalCliSendRequest.Message) + textstyleParser := utils.NewTextstyleParser(signalCliSendRequest.Message) + signalCliSendRequest.Message, signalCliTextFormatStrings = textstyleParser.Parse() } var groupId string = "" diff --git a/src/utils/textstyleparser.go b/src/utils/textstyleparser.go index e368bba..6abd348 100644 --- a/src/utils/textstyleparser.go +++ b/src/utils/textstyleparser.go @@ -2,6 +2,8 @@ package utils import ( "strconv" + "unicode/utf16" + "unicode/utf8" ) const ( @@ -18,119 +20,141 @@ const ( ItalicBegin = 1 ItalicEnd = 2 BoldBegin = 3 - BoldEnd1 = 4 - BoldEnd2 = 5 MonoSpaceBegin = 6 - MonoSpaceEnd = 7 StrikethroughBegin = 8 - StrikethroughEnd = 9 - SpoilerBegin1 = 10 - SpoilerBegin = 11 - SpoilerEnd1 = 12 - SpoilerEnd2 = 13 + SpoilerBegin = 9 ) -func getUtf16CharacterCount(s string) int { - stringLength := len(s) - if stringLength == 1 { - return 1 +func getUtf16StringLength(s string) int { + runes := []rune(s) //turn string to slice + + length := 0 + for _, r := range runes { + length += utf16.RuneLen(r) } - return stringLength / 2 + return length +} + +type TokenState struct { + BeginPos int + Token int +} + +type Stack []TokenState + +func (s *Stack) Push(v TokenState) { + *s = append(*s, v) +} + +func (s *Stack) Pop() TokenState { + ret := (*s)[len(*s)-1] + *s = (*s)[0 : len(*s)-1] + + return ret } -func getAdditionalCharacterCount(characterCount int) int { - additionalCharacterCount := characterCount - 1 - if additionalCharacterCount > 0 { - return additionalCharacterCount +func (s *Stack) Peek() TokenState { + ret := (*s)[len(*s)-1] + return ret +} + +func (s *Stack) Empty() bool { + if len(*s) == 0 { + return true } - return 0 + return false } -func ParseMarkdownMessage(message string) (string, []string) { - textFormat := Normal - textFormatBegin := 0 - textFormatLength := 0 - numOfControlChars := 0 - state := None - signalCliFormatStrings := []string{} - fullString := "" - lastChar := "" - additionalCharacterCount := 0 - - runes := []rune(message) //turn string to slice - - for i, v := range runes { //iterate through rune - if v == '*' { - if state == ItalicBegin { - if lastChar == "*" { - state = BoldBegin - textFormat = Bold - textFormatBegin = i - numOfControlChars + additionalCharacterCount - textFormatLength = 0 - additionalCharacterCount = 0 - } else { - state = ItalicEnd - } - } else if state == None { - state = ItalicBegin - textFormat = Italic - textFormatBegin = i - numOfControlChars + additionalCharacterCount - textFormatLength = 0 - } else if state == BoldBegin { - state = BoldEnd1 - } else if state == BoldEnd1 { - state = BoldEnd2 - } - numOfControlChars += 1 - } else if v == '|' { - if state == None { - state = SpoilerBegin1 - } else if state == SpoilerBegin1 && lastChar == "|" { - state = SpoilerBegin - textFormat = Spoiler - textFormatBegin = i - numOfControlChars + additionalCharacterCount - textFormatLength = 0 - } else if state == SpoilerBegin { - state = SpoilerEnd1 - } else if state == SpoilerEnd1 && lastChar == "|" { - state = SpoilerEnd2 - } - numOfControlChars += 1 - } else if v == '`' { - if state == None { - state = MonoSpaceBegin - textFormat = Monospace - textFormatBegin = i - numOfControlChars + additionalCharacterCount - textFormatLength = 0 - } else if state == MonoSpaceBegin { - state = MonoSpaceEnd - } - numOfControlChars += 1 - } else if v == '~' { - if state == None { - state = StrikethroughBegin - textFormat = Strikethrough - textFormatBegin = i - numOfControlChars + additionalCharacterCount - textFormatLength = 0 - } else if state == StrikethroughBegin { - state = StrikethroughEnd - } - numOfControlChars += 1 +const eof = -1 + +type TextstyleParser struct { + input string + pos int + width int + tokens Stack + fullString string + signalCliFormatStrings []string + //numOfControlTokens int +} + +func NewTextstyleParser(input string) *TextstyleParser { + return &TextstyleParser{ + input: input, + pos: 0, + width: 0, + tokens: make(Stack, 0), + fullString: "", + signalCliFormatStrings: []string{}, + } +} + +func (l *TextstyleParser) next() (rune rune) { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + //r := []rune(l.input[l.pos:])[0] + //l.width = utf16.RuneLen(r) + //l.pos += l.width + rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += l.width + return rune +} + +// backup steps back one rune. +// Can be called only once per call of next. +func (l *TextstyleParser) backup() { + l.pos -= l.width +} + +// peek returns but does not consume +// the next rune in the input. +func (l *TextstyleParser) peek() rune { + rune := l.next() + l.backup() + return rune +} + +func (l *TextstyleParser) handleToken(tokenType int, signalCliStylingType string) { + if l.tokens.Empty() { + l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType}) + } else { + if l.tokens.Peek().Token == tokenType { + tokenBeginState := l.tokens.Pop() + l.signalCliFormatStrings = append(l.signalCliFormatStrings, strconv.Itoa(tokenBeginState.BeginPos)+":"+strconv.Itoa(getUtf16StringLength(l.fullString)-tokenBeginState.BeginPos)+":"+signalCliStylingType) } else { - textFormatLength += 1 - fullString += string(v) - additionalCharacterCount += getAdditionalCharacterCount(getUtf16CharacterCount(string(v))) + l.tokens.Push(TokenState{BeginPos: getUtf16StringLength(l.fullString), Token: tokenType}) + } + } +} + +func (l *TextstyleParser) Parse() (string, []string) { + for { + c := l.next() + if c == eof { + break } - lastChar = string(v) - - if state == ItalicEnd || state == BoldEnd2 || state == MonoSpaceEnd || state == StrikethroughEnd || state == SpoilerEnd2 { - signalCliFormatStrings = append(signalCliFormatStrings, strconv.Itoa(textFormatBegin)+":"+strconv.Itoa(textFormatLength+additionalCharacterCount)+":"+textFormat) - state = None - textFormatBegin = 0 - textFormatLength = 0 - textFormat = Normal + + nextRune := l.peek() + + if c == '*' { + if nextRune == '*' { //Bold + l.next() + l.handleToken(BoldBegin, Bold) + } else { //Italic + l.handleToken(ItalicBegin, Italic) + } + } else if (c == '|') && (nextRune == '|') { + l.next() + l.handleToken(SpoilerBegin, Spoiler) + } else if c == '~' { + l.handleToken(StrikethroughBegin, Strikethrough) + } else if c == '`' { + l.handleToken(MonoSpaceBegin, Monospace) + } else { + l.fullString += string(c) } } - return fullString, signalCliFormatStrings + return l.fullString, l.signalCliFormatStrings } diff --git a/src/utils/textstyleparser_test.go b/src/utils/textstyleparser_test.go index 9aceb39..ea0055c 100644 --- a/src/utils/textstyleparser_test.go +++ b/src/utils/textstyleparser_test.go @@ -15,80 +15,107 @@ func expectFormatStringsEqual(t *testing.T, formatStrings1 []string, formatStrin } } -func TestSimpleMessage1(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("*italic*") +func TestSimpleItalicMessage(t *testing.T) { + textstyleParser := NewTextstyleParser("*italic*") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "italic") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:6:ITALIC"}) } +func TestSimpleBoldMessage(t *testing.T) { + textstyleParser := NewTextstyleParser("**bold**") + message, signalCliFormatStrings := textstyleParser.Parse() + expectMessageEqual(t, message, "bold") + expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:4:BOLD"}) +} + func TestSimpleMessage(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("*This is a italic message*") + textstyleParser := NewTextstyleParser("*This is a italic message*") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "This is a italic message") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:24:ITALIC"}) } func TestBoldAndItalicMessage(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and *italic* message") + textstyleParser := NewTextstyleParser("This is a **bold** and *italic* message") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "This is a bold and italic message") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "19:6:ITALIC"}) } func TestTwoBoldFormattedStrings(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("This is a **bold** and another **bold** message") + textstyleParser := NewTextstyleParser("This is a **bold** and another **bold** message") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "This is a bold and another bold message") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:4:BOLD", "27:4:BOLD"}) } func TestStrikethrough(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("This is a ~strikethrough~ and a **bold** message") + textstyleParser := NewTextstyleParser("This is a ~strikethrough~ and a **bold** message") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "This is a strikethrough and a bold message") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:13:STRIKETHROUGH", "30:4:BOLD"}) } func TestMonospace(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("This is a `monospace` and a **bold** message") + textstyleParser := NewTextstyleParser("This is a `monospace` and a **bold** message") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "This is a monospace and a bold message") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"10:9:MONOSPACE", "26:4:BOLD"}) } func TestMulticharacterEmoji(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("👋abcdefg") + textstyleParser := NewTextstyleParser("👋abcdefg") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "👋abcdefg") expectFormatStringsEqual(t, signalCliFormatStrings, []string{}) } func TestMulticharacterEmojiWithBoldText(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("👋**abcdefg**") + textstyleParser := NewTextstyleParser("👋**abcdefg**") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "👋abcdefg") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"2:7:BOLD"}) } func TestMultipleMulticharacterEmoji(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾abcdefg") + textstyleParser := NewTextstyleParser("👋🏾abcdefg") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "👋🏾abcdefg") expectFormatStringsEqual(t, signalCliFormatStrings, []string{}) } func TestMultipleMulticharacterEmojiWithBoldText(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("👋🏾**abcdefg**") + textstyleParser := NewTextstyleParser("👋🏾**abcdefg**") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "👋🏾abcdefg") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"4:7:BOLD"}) } func TestMulticharacterEmojiWithBoldText2(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("Test 👦🏿 via **signal** API") + textstyleParser := NewTextstyleParser("Test 👦🏿 via **signal** API") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "Test 👦🏿 via signal API") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"14:6:BOLD"}) } func TestSpoiler(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler||") + textstyleParser := NewTextstyleParser("||this is a spoiler||") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "this is a spoiler") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER"}) } func TestSpoiler1(t *testing.T) { - message, signalCliFormatStrings := ParseMarkdownMessage("||this is a spoiler|| and another ||spoiler||") + textstyleParser := NewTextstyleParser("||this is a spoiler|| and another ||spoiler||") + message, signalCliFormatStrings := textstyleParser.Parse() expectMessageEqual(t, message, "this is a spoiler and another spoiler") expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:17:SPOILER", "30:7:SPOILER"}) } + +func TestBoldTextInsideSpoiler(t *testing.T) { + textstyleParser := NewTextstyleParser("||**this is a bold text inside a spoiler**||") + message, signalCliFormatStrings := textstyleParser.Parse() + expectMessageEqual(t, message, "this is a bold text inside a spoiler") + expectFormatStringsEqual(t, signalCliFormatStrings, []string{"0:36:BOLD", "0:36:SPOILER"}) +}