Skip to content

Commit

Permalink
improve getMessageHTML signature, improve telegram message crop
Browse files Browse the repository at this point in the history
Previously, the telegram message was stripped of its HTML,
which was unintended. After that change, it would be
stripped of HTML formatting only if the characters
limit is exceeded. Otherwise, we need to do sophisticated
HTML parsing to find the tag, which would be left without the
closing part when trimming the message.
  • Loading branch information
paskal committed Mar 25, 2022
1 parent 11db6ab commit 37f5979
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
33 changes: 20 additions & 13 deletions app/proc/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func (client TelegramClient) Send(channelID string, item feed.Item) (err error)
func (client TelegramClient) sendText(channelID string, item feed.Item) (*tb.Message, error) {
message, err := client.Bot.Send(
recipient{chatID: channelID},
client.getMessageHTML(item, true, false),
client.getMessageHTML(item, htmlMessageParams{WithMp3Link: true}),
tb.ModeHTML,
tb.NoPreview,
)
Expand All @@ -97,7 +97,7 @@ func (client TelegramClient) sendAudio(channelID string, item feed.Item) (*tb.Me
File: tb.FromReader(&httpBodyCopy),
FileName: item.GetFilename(),
MIME: "audio/mpeg",
Caption: client.getMessageHTML(item, false, true),
Caption: client.getMessageHTML(item, htmlMessageParams{TrimCaption: true}),
Title: item.Title,
Performer: item.Author,
Duration: client.duration(tee),
Expand All @@ -121,20 +121,19 @@ func (client TelegramClient) tagLinkOnlySupport(htmlText string) string {
return html.UnescapeString(p.Sanitize(htmlText))
}

type htmlMessageParams struct{ WithMp3Link, TrimCaption bool }

// getMessageHTML generates HTML message from provided feed.Item
func (client TelegramClient) getMessageHTML(item feed.Item, withMp3Link, trimCaption bool) string {
func (client TelegramClient) getMessageHTML(item feed.Item, params htmlMessageParams) string {
var header, footer string
title := strings.TrimSpace(item.Title)
if title != "" {
switch {
case item.Link == "":
header = fmt.Sprintf("%s\n\n", title)
case item.Link != "":
header = fmt.Sprintf("<a href=%q>%s</a>\n\n", item.Link, title)
}
if title != "" && item.Link == "" {
header = fmt.Sprintf("%s\n\n", title)
} else if title != "" && item.Link != "" {
header = fmt.Sprintf("<a href=%q>%s</a>\n\n", item.Link, title)
}

if withMp3Link {
if params.WithMp3Link {
footer += fmt.Sprintf("\n\n%s", item.Enclosure.URL)
}

Expand All @@ -146,8 +145,8 @@ func (client TelegramClient) getMessageHTML(item feed.Item, withMp3Link, trimCap
description = strings.TrimSpace(description)

// https://limits.tginfo.me/en 1024 symbol limit for caption
if trimCaption && len(header+description+footer) > 1024 {
description = CleanText(description, 1020-len(header+footer))
if params.TrimCaption && len(header+description+footer) > 1024 {
description = CropText(description, 1024-len(header+footer))
}

return header + description + footer
Expand Down Expand Up @@ -181,3 +180,11 @@ func (r recipient) Recipient() string {

return r.chatID
}

// CropText shrinks the provided string, removing HTML tags in case it's exceeding the limit
func CropText(inp string, max int) string {
if len([]rune(inp)) > max {
return CleanText(inp, max)
}
return inp
}
12 changes: 9 additions & 3 deletions app/proc/telegram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,21 @@ func TestFormattedMessage(t *testing.T) {
i := i
tc := tc
t.Run(strconv.Itoa(i), func(t *testing.T) {
htmlMessage := client.getMessageHTML(tc.item, false, false)
htmlMessage := client.getMessageHTML(tc.item, htmlMessageParams{})
assert.Equal(t, tc.expectedHTML, htmlMessage)
})
}
}

func TestTruncatedMessage(t *testing.T) {
client := TelegramClient{}
htmlMessage := client.getMessageHTML(feed.Item{Title: "title", Enclosure: feed.Enclosure{URL: "https://example.com/some.mp3"}, Description: template.HTML(strings.Repeat("test", 1000))}, true, true) //nolint:gosec // no problem to have it in the test
htmlMessage := client.getMessageHTML(
feed.Item{
Title: "title",
Enclosure: feed.Enclosure{URL: "https://example.com/some.mp3"},
Description: template.HTML(strings.Repeat("test", 1000)), //nolint:gosec // test case, no security issues
},
htmlMessageParams{WithMp3Link: true, TrimCaption: true})
assert.True(t, strings.HasPrefix(htmlMessage, "title\n\n"))
assert.True(t, strings.HasSuffix(htmlMessage, "\n\nhttps://example.com/some.mp3"))
assert.LessOrEqual(t, len(htmlMessage), 1024)
Expand All @@ -155,7 +161,7 @@ func TestGetMessageHTML(t *testing.T) {
expected := "<a href=\"https://example.com/xyz\">Podcast</a>\n\nNews <a href=\"/test\">Podcast Link</a>\n\nhttps://example.com"

client := TelegramClient{}
msg := client.getMessageHTML(item, true, false)
msg := client.getMessageHTML(item, htmlMessageParams{WithMp3Link: true})
assert.Equal(t, expected, msg)
}

Expand Down

0 comments on commit 37f5979

Please sign in to comment.