-
Notifications
You must be signed in to change notification settings - Fork 1
/
rivet.go
140 lines (116 loc) · 3.25 KB
/
rivet.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright 2021 Wayback Archiver. All rights reserved.
// Use of this source code is governed by the MIT
// license that can be found in the LICENSE file.
package rivet
import (
"bytes"
"context"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/go-shiori/obelisk"
"github.com/kennygrant/sanitize"
"github.com/pkg/errors"
"github.com/wabarc/rivet/ipfs"
)
// Shaft represents the rivet handler.
type Shaft struct {
// Client represents a http client.
Client *http.Client
// Hold specifies which IPFS mode to pin data through.
Hold ipfs.Pinning
// Next is a fallback pinning service. If the `Hold`
// pinning service fails, it will be used.
Next ipfs.Pinning
// Do not store file on any IPFS node, just archive
ArchiveOnly bool
}
// Wayback uses IPFS to archive webpages.
func (s *Shaft) Wayback(ctx context.Context, input *url.URL) (cid string, err error) {
name := sanitize.BaseName(input.Host) + sanitize.BaseName(input.Path)
dir := "rivet-" + name
if len(dir) > 255 {
dir = dir[:254]
}
dir, err = ioutil.TempDir(os.TempDir(), dir+"-")
if err != nil {
return "", errors.Wrap(err, "create temp directory failed: "+dir)
}
defer os.RemoveAll(dir)
uri := input.String()
req := obelisk.Request{URL: uri, Input: inputFromContext(ctx)}
arc := &obelisk.Archiver{
DisableJS: isDisableJS(uri),
SkipResourceURLError: true,
WrapDirectory: dir,
RequestTimeout: 3 * time.Second,
}
if s.Client != nil {
arc.Transport = s.Client.Transport
}
arc.Validate()
content, _, err := arc.Archive(ctx, req)
if err != nil {
return "", errors.Wrap(err, "archive failed")
}
// For auto indexing in IPFS, the filename should be index.html.
indexFile := filepath.Join(dir, "index.html")
if s.ArchiveOnly {
indexFile = name + ".html"
}
if err := ioutil.WriteFile(indexFile, content, 0600); err != nil {
return "", errors.Wrap(err, "create index file failed")
}
if s.ArchiveOnly {
return indexFile, nil
}
switch s.Hold.Mode {
case ipfs.Local:
cid, err = (&ipfs.Locally{Pinning: s.Hold}).PinDir(dir)
case ipfs.Remote:
cid, err = (&ipfs.Remotely{Pinning: s.Hold}).PinDir(dir)
}
if err != nil {
// Try fallback pinning service
switch s.Next.Mode {
case ipfs.Local:
cid, err = (&ipfs.Locally{Pinning: s.Next}).PinDir(dir)
case ipfs.Remote:
cid, err = (&ipfs.Remotely{Pinning: s.Next}).PinDir(dir)
}
if err != nil {
return "", errors.Wrap(err, "pin failed")
}
}
if cid == "" {
return "", errors.New("cid empty")
}
return "https://ipfs.io/ipfs/" + cid, nil
}
type ctxKeyInput struct{}
// WithInput permits to inject a webpage into a context by given input.
func (s *Shaft) WithInput(ctx context.Context, input []byte) (c context.Context) {
return context.WithValue(ctx, ctxKeyInput{}, input)
}
func inputFromContext(ctx context.Context) io.Reader {
if b, ok := ctx.Value(ctxKeyInput{}).([]byte); ok {
return bytes.NewReader(b)
}
return nil
}
func isDisableJS(link string) bool {
// e.g. DISABLEJS_URIS=wikipedia.org|eff.org/tags
uris := os.Getenv("DISABLEJS_URIS")
if uris == "" {
return false
}
regex := regexp.QuoteMeta(strings.ReplaceAll(uris, "|", "@@"))
re := regexp.MustCompile(`(?m)` + strings.ReplaceAll(regex, "@@", "|"))
return re.MatchString(link)
}