-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgis.go
248 lines (216 loc) · 6.54 KB
/
gis.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
package gis
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"io"
"io/ioutil"
"log"
"math/rand"
"mime/multipart"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
)
var pattern = regexp.MustCompile(`data:image/(.*?);base64,(.*?)';`)
var wg sync.WaitGroup
const DEFUALT_DOWNLOAD_PATH = "download"
const DEFUALT_UPLOAD_PATH = "upload"
var DEFUALT_USER_AGENTS = []string{
`Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50`,
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36 Edg/85.0.564.51`,
`Mozilla/5.0 (X11; U; Linux x86_64; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0 Safari/531.2`,
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 OPR/48.0.2685.52`,
`Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.49 Safari/537.36 OPR/48.0.2685.7`,
`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44`,
`Mozilla/5.0 (X11; U; FreeBSD i386; zh-tw; rv:31.0) Gecko/20100101 Opera/13.0`,
`Mozilla/5.0 (X11; CrOS armv7l 9592.96.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.114 Safari/537.36`,
}
var DEFAULT_URL = &url.URL{
Scheme: "https",
Host: "www.google.com",
Path: "/searchbyimage/upload",
}
func init() {
// 设置随机数种子
rand.Seed(time.Now().UnixNano())
}
type Searcher struct {
maxRetryTimes int // 最大尝试次数
url *url.URL // 镜像地址
log *log.Logger // 日志
client *http.Client // 请求客户端
userAgents []string // 请求头部中的用户代理
upload string // 上传图片路径
download string // 下载图片所在路径
}
func NewSearcher(options ...Option) *Searcher {
s := &Searcher{
3, DEFAULT_URL, nil, nil, DEFUALT_USER_AGENTS, DEFUALT_UPLOAD_PATH, DEFUALT_DOWNLOAD_PATH,
}
for _, option := range options {
option.apply(s)
}
// 惰性初始化,避免 options 中设置了
if s.client == nil {
s.client = &http.Client{}
}
if s.log == nil {
l := &log.Logger{}
s.log = l
}
return s
}
// 发送请求
func (s *Searcher) SendRequest(req *http.Request) ([]byte, error) {
res, err := s.client.Do(req)
if err != nil {
return nil, err
}
defer res.Body.Close()
// 读取响应
d, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, err
}
return d, nil
}
// 创建请求
func (s *Searcher) buildRequest(image string) (*http.Request, error) {
var buff bytes.Buffer
writer := multipart.NewWriter(&buff)
// 基本表单项
writer.WriteField("image_url", "")
writer.WriteField("image_content", "")
writer.WriteField("filename", "")
writer.WriteField("hl", "en")
// 图片文件
w, err := writer.CreateFormFile("encoded_image", "")
if err != nil {
return nil, err
}
// 打开图片文件
f, err := os.Open(image)
if err != nil {
return nil, err
}
defer f.Close()
// 将图片数据写入w中
if _, err := io.Copy(w, f); err != nil {
return nil, err
}
if err := writer.Close(); err != nil {
return nil, err
}
req, err := http.NewRequest("POST", s.url.String(), &buff)
if err != nil {
return nil, err
}
req.Header.Set("Host", s.url.Host)
req.Header.Set("Origin", s.url.String())
req.Header.Set("Content-Type", writer.FormDataContentType())
req.Header.Set("User-Agent", s.userAgents[rand.Intn(len(s.userAgents))])
return req, nil
}
// 从网页中获取到相关图片的base64编码数据
func (s *Searcher) getBase64ImageData(html []byte) ([]image, error) {
data := pattern.FindAllSubmatch(html, -1)
var temp = make([]image, 0)
for _, value := range data {
temp = append(temp, image{typ: string(value[1]), data: value[2]})
}
if len(temp) == 0 {
return nil, errors.New("no matches")
}
return temp, nil
}
// 将base64编码的图片数据解码并写入文件中
func (s *Searcher) decodeBase64(data []byte, filename string, wg *sync.WaitGroup) error {
defer wg.Done()
data = bytes.Replace(data, []byte("\\x3d"), []byte("="), -1)
var binary = make([]byte, len(data))
// 进行解码
_, err := base64.StdEncoding.Decode(binary, data)
if err != nil {
s.log.Println("decode base64 data error: ", err)
return err
}
// 写入文件中
err = ioutil.WriteFile(filename, binary, 0666)
if err != nil {
s.log.Println("write file error: ", err)
return err
}
fmt.Printf("写入图片文件 %s 成功\n", filename)
return nil
}
func (s *Searcher) walkFunc(path string, info os.FileInfo, err error) error {
if !info.IsDir() {
wg.Add(1)
fmt.Println("upload file: ", info.Name())
go func() {
defer wg.Done()
var imagesData []image
counter := 0
for counter <= s.maxRetryTimes {
time.Sleep(time.Microsecond * time.Duration(rand.Int31n(20)))
counter++
fmt.Printf("第 %d 次尝试上传图片 %s \n", counter, info.Name())
req, err := s.buildRequest(path)
if err != nil {
s.log.Println("build request error")
return
}
html, err := s.SendRequest(req)
if err != nil {
s.log.Println(err)
continue
}
imagesData, err = s.getBase64ImageData(html)
if err != nil {
continue
}
if len(imagesData) != 0 {
break
}
}
if counter >= s.maxRetryTimes {
s.log.Println("max retry times to upload file ", path)
return
}
fmt.Printf("图片 %s 上传成功\n", info.Name())
for i, img := range imagesData {
filename := filepath.Base(info.Name())
// 文件所在目录
dir := filepath.Join(filepath.Dir(path), strings.TrimSuffix(filename, filepath.Ext(filename)))
// 下载图片所在目录
dir = strings.Replace(dir, s.upload, s.download, 1)
if !s.exist(dir) {
if er := os.MkdirAll(dir, 0666); er != nil {
s.log.Fatalf("create path %s error: %s", dir, er.Error())
return
}
}
wg.Add(1)
go s.decodeBase64(img.data, filepath.Join(dir, fmt.Sprintf("%d.%s", i+1, img.typ)), &wg)
}
}()
}
return nil
}
func (s *Searcher) Run() {
start := time.Now()
filepath.Walk(s.upload, s.walkFunc)
wg.Wait()
fmt.Printf("Total time: %d s\n", time.Since(start)/time.Second)
}
func (s Searcher) exist(path string) bool {
_, err := os.Stat(path)
return !errors.Is(err, os.ErrNotExist)
}