-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathinvestigate.go
51 lines (38 loc) · 955 Bytes
/
investigate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
package articletext
/*
This file contains a function to investigate a list of urls and chooose optimal
path (selector) to use later for quick extracting a text from HTML document
Author: Roman Gelembjuk <roman@gelembjuk.com>
*/
import (
"errors"
)
// the functions finds a path (selector, signature) for each url and returns one that was found most often
func getOptimalArticleSignatureByUrls(urls []string) (string, error) {
if len(urls) < 1 {
return "", errors.New("No urls provided")
}
var paths map[string]int
paths = make(map[string]int)
for _, url := range urls {
path, err := GetArticleSignatureFromUrl(url)
if err != nil {
return "", err
}
if count, ok := paths[path]; ok {
paths[path] = count + 1
} else {
paths[path] = 1
}
}
// find what path has maximum of occurences
maxpath := ""
maxval := 0
for k, v := range paths {
if v > maxval {
maxval = v
maxpath = k
}
}
return maxpath, nil
}