forked from jlelse/GoBlog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck.go
147 lines (141 loc) · 3.64 KB
/
check.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package main
import (
"context"
"fmt"
"io"
"log"
"net/http"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/klauspost/compress/gzhttp"
"github.com/samber/lo"
"go.goblog.app/app/pkgs/bufferpool"
"golang.org/x/sync/singleflight"
)
func (a *goBlog) checkAllExternalLinks() {
posts, err := a.getPosts(&postsRequestConfig{
status: []postStatus{statusPublished},
visibility: []postVisibility{visibilityPublic, visibilityUnlisted},
withoutParameters: true,
})
if err != nil {
log.Println(err.Error())
return
}
_ = a.checkLinks(log.Writer(), posts...)
}
func (a *goBlog) checkLinks(w io.Writer, posts ...*post) error {
// Get all links
allLinks, err := a.allLinks(posts...)
if err != nil {
return err
}
fmt.Fprintln(w, "Checking", len(allLinks), "links")
// Cancel context
var canceled, finished atomic.Value
canceled.Store(false)
finished.Store(false)
cancelContext, cancelFunc := context.WithCancel(context.Background())
a.shutdown.Add(func() {
if finished.Load().(bool) {
return
}
canceled.Store(true)
cancelFunc()
fmt.Fprintln(w, "Canceled link check")
})
// Create HTTP client
client := &http.Client{
Timeout: 30 * time.Second,
Transport: gzhttp.Transport(&http.Transport{
// Limits
DisableKeepAlives: true,
MaxConnsPerHost: 1,
}),
}
// Process all links
var wg sync.WaitGroup
var sm sync.Map
var sg singleflight.Group
con := make(chan bool, 5)
for _, l := range allLinks {
con <- true // This waits until there's space in the buffered channel
// Check if check is canceled
if canceled.Load().(bool) {
break
}
// Increase wait group
wg.Add(1)
// Start link check
go func(link *stringPair) {
defer func() {
<-con // Read from channel, to let next routine execute
wg.Done()
}()
// Check if link is internal
if strings.HasPrefix(link.Second, a.cfg.Server.PublicAddress) {
return
}
// Process link
r, err, _ := sg.Do(link.Second, func() (any, error) {
// Check if already cached
if mr, ok := sm.Load(link.Second); ok {
return mr, nil
}
// Do request
req, err := http.NewRequestWithContext(cancelContext, http.MethodGet, link.Second, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.5")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
_ = resp.Body.Close()
// Cache result
sm.Store(link.Second, resp.StatusCode)
// Return result
return resp.StatusCode, nil
})
// Check error
if err != nil {
if !strings.Contains(err.Error(), "context canceled") {
fmt.Fprintln(w, "Error:", link.Second, err.Error())
}
return
}
// Check status code
if statusCode := r.(int); !successStatus(statusCode) {
fmt.Fprintln(w, link.Second, "in", link.First, statusCode, http.StatusText(statusCode))
}
}(l)
}
// Wait for all links to finish
wg.Wait()
// Finish
finished.Store(true)
return nil
}
func (a *goBlog) allLinks(posts ...*post) (allLinks []*stringPair, err error) {
for _, p := range posts {
contentBuf := bufferpool.Get()
a.postHtmlToWriter(contentBuf, p, true)
links, err := allLinksFromHTML(contentBuf, a.fullPostURL(p))
bufferpool.Put(contentBuf)
if err != nil {
return nil, err
}
allLinks = lo.Map(links, func(s string, _ int) *stringPair {
return &stringPair{a.fullPostURL(p), s}
})
}
return allLinks, nil
}
func successStatus(status int) bool {
return status >= 200 && status < 400
}