-
-
Notifications
You must be signed in to change notification settings - Fork 19
/
feeditem.go
113 lines (96 loc) · 2.38 KB
/
feeditem.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
// Package withstate provides a simple wrapper of the gofeed.Item, which
// allows simple tracking of the seen vs. unseen (new vs. old) state of
// an RSS feeds' entry.
//
// State for a feed-item is stored upon the local filesystem.
package withstate
import (
"fmt"
"net/url"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed"
)
// FeedItem is a structure wrapping a gofeed.Item, to allow us to record
// state.
type FeedItem struct {
// Wrapped structure
*gofeed.Item
// Tag is a field that can be set for this feed item,
// inside our configuration file.
Tag string
}
// RawContent provides content or fallback to description
func (item *FeedItem) RawContent() string {
// The body should be stored in the
// "Content" field.
content := item.Item.Content
// If the Content field is empty then
// use the Description instead, if it
// is non-empty itself.
if (content == "") && item.Item.Description != "" {
content = item.Item.Description
}
return content
}
// HTMLContent provides processed HTML
func (item *FeedItem) HTMLContent() (string, error) {
rawContent := item.RawContent()
doc, err := goquery.NewDocumentFromReader(strings.NewReader(rawContent))
if err != nil {
return rawContent, err
}
doc.Find("a, img").Each(func(i int, e *goquery.Selection) {
var attr string
switch e.Get(0).Data {
case "a":
attr = "href"
case "img":
attr = "src"
e.RemoveAttr("loading")
e.RemoveAttr("srcset")
}
ref, _ := e.Attr(attr)
switch {
case ref == "":
return
case strings.HasPrefix(ref, "data:"):
return
case strings.HasPrefix(ref, "http://"):
return
case strings.HasPrefix(ref, "https://"):
return
default:
e.SetAttr(attr, item.patchReference(ref))
}
})
doc.Find("iframe").Each(func(i int, iframe *goquery.Selection) {
src, _ := iframe.Attr("src")
if src == "" {
iframe.Remove()
} else {
iframe.ReplaceWithHtml(fmt.Sprintf(`<a href="%s">%s</a>`, src, src))
}
})
doc.Find("script").Each(func(i int, script *goquery.Selection) {
script.Remove()
})
return doc.Html()
}
func (item *FeedItem) patchReference(ref string) string {
resURL, err := url.Parse(ref)
if err != nil {
return ref
}
itemURL, err := url.Parse(item.Item.Link)
if err != nil {
return ref
}
if resURL.Host == "" {
resURL.Host = itemURL.Host
}
if resURL.Scheme == "" {
resURL.Scheme = itemURL.Scheme
}
return resURL.String()
}