|
| 1 | +// Copyright 2018 The Gofrs |
| 2 | +// Use of this source code is governed by an Apache License |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +// finder is a command line tool (CLI) used to find stale projects |
| 6 | +// on Github (those without recent commits, issues, etc) and rank |
| 7 | +// them by their godoc import counts. |
| 8 | +// |
| 9 | +// Godoc.org import counts are public and computed by godoc.org as |
| 10 | +// it indexes the public Go repositories. |
| 11 | +package main // import "github.com/gofrs/help-requests/cmd/finder |
| 12 | + |
| 13 | +import ( |
| 14 | + "context" |
| 15 | + "errors" |
| 16 | + "flag" |
| 17 | + "fmt" |
| 18 | + "math" |
| 19 | + "net/http" |
| 20 | + "os" |
| 21 | + "sort" |
| 22 | + "strconv" |
| 23 | + "strings" |
| 24 | + "text/tabwriter" |
| 25 | + "time" |
| 26 | + |
| 27 | + "github.com/google/go-github/github" |
| 28 | + "golang.org/x/net/html" |
| 29 | + "golang.org/x/oauth2" |
| 30 | +) |
| 31 | + |
| 32 | +var ( |
| 33 | + flagCount = flag.Int("count", 25, "How many (Github) projects to lookup") |
| 34 | +) |
| 35 | + |
| 36 | +func main() { |
| 37 | + flag.Parse() |
| 38 | + |
| 39 | + ghClient, err := createGithubClient(context.Background()) |
| 40 | + if err != nil { |
| 41 | + fmt.Fprintf(os.Stderr, "ERROR: problem creating github client: %v", err) |
| 42 | + } |
| 43 | + |
| 44 | + opts := github.ListOptions{ |
| 45 | + PerPage: *flagCount, |
| 46 | + Page: 1, |
| 47 | + } |
| 48 | + |
| 49 | + query := "stars:>100 pushed:<2018-01-01 language:Go" |
| 50 | + repoRes, res, err := ghClient.Search.Repositories(context.Background(), query, &github.SearchOptions{ |
| 51 | + Sort: "stars", |
| 52 | + Order: "desc", |
| 53 | + ListOptions: opts, |
| 54 | + }) |
| 55 | + if err != nil { |
| 56 | + fmt.Fprintf(os.Stderr, "ERROR: problem reading github repositories: %v", err) |
| 57 | + } |
| 58 | + res.Close = true |
| 59 | + |
| 60 | + type row struct { |
| 61 | + text string |
| 62 | + importCount int |
| 63 | + } |
| 64 | + var rows []row |
| 65 | + for i := range repoRes.Repositories { |
| 66 | + repo := repoRes.Repositories[i] |
| 67 | + |
| 68 | + cleanName := strings.Replace(*repo.HTMLURL, `https://`, "", 1) |
| 69 | + |
| 70 | + // TODO(adam): goroutines + sync.WaitGroup |
| 71 | + importers, err := scrapeGodocImports(cleanName) |
| 72 | + if err != nil { |
| 73 | + fmt.Fprintf(os.Stderr, "ERROR: problem grabbing %s godoc importers: %v\n", cleanName, err) |
| 74 | + } |
| 75 | + |
| 76 | + days := int(math.Abs(float64(repo.PushedAt.Sub(time.Now()).Hours()) / 24.0)) |
| 77 | + line := fmt.Sprintf("%s\t%d\t%d\t%d\n", cleanName, *repo.StargazersCount, days, importers) |
| 78 | + rows = append(rows, row{ |
| 79 | + text: line, |
| 80 | + importCount: importers, |
| 81 | + }) |
| 82 | + } |
| 83 | + sort.Slice(rows, func(i, j int) bool { return rows[i].importCount < rows[j].importCount }) |
| 84 | + |
| 85 | + // Write (sorted) output |
| 86 | + w := tabwriter.NewWriter(os.Stdout, 0, 0, 1, ' ', 0) |
| 87 | + fmt.Fprintf(w, "name\tstars\tlast commit (days)\timporters\n") |
| 88 | + defer w.Flush() |
| 89 | + for i := range rows { |
| 90 | + // we're going to write the rows in reverse |
| 91 | + // this will output them in desc order |
| 92 | + fmt.Fprintf(w, rows[len(rows)-i-1].text) |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +func createGithubClient(ctx context.Context) (*github.Client, error) { |
| 97 | + v := os.Getenv("GITHUB_TOKEN") |
| 98 | + if v == "" { |
| 99 | + return nil, errors.New("environment variable GITHUB_TOKEN is required") |
| 100 | + } |
| 101 | + ts := oauth2.StaticTokenSource(&oauth2.Token{ |
| 102 | + AccessToken: v, |
| 103 | + }) |
| 104 | + tc := oauth2.NewClient(ctx, ts) |
| 105 | + return github.NewClient(tc), nil |
| 106 | +} |
| 107 | + |
| 108 | +func scrapeGodocImports(importPath string) (int, error) { |
| 109 | + req, err := http.NewRequest("GET", "https://godoc.org/"+importPath, nil) |
| 110 | + if err != nil { |
| 111 | + return -1, fmt.Errorf("problem loading godoc.org: %v", err) |
| 112 | + } |
| 113 | + req.Header.Set("User-Agent", "Gofrs popstalerepo bot") |
| 114 | + |
| 115 | + resp, err := http.DefaultClient.Do(req) |
| 116 | + if err != nil { |
| 117 | + return -1, fmt.Errorf("problem loading %s: %v", req.URL, err) |
| 118 | + } |
| 119 | + defer resp.Body.Close() |
| 120 | + |
| 121 | + // recursive search, from /x/net/html docs |
| 122 | + var f func(n *html.Node) (int, error) |
| 123 | + f = func(n *html.Node) (int, error) { |
| 124 | + if n.Type == html.ElementNode && n.Data == "a" { |
| 125 | + for _, a := range n.Attr { |
| 126 | + // TODO(adam): we should try and refresh importers |
| 127 | + // when running into errors. |
| 128 | + if a.Key == "href" && strings.Contains(a.Val, "?importers") { |
| 129 | + parts := strings.Fields(n.FirstChild.Data) |
| 130 | + n, err := strconv.Atoi(parts[0]) |
| 131 | + if err != nil { |
| 132 | + return -1, fmt.Errorf("couldn't parse %q: %v", parts[0], err) |
| 133 | + } |
| 134 | + return n, nil |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 139 | + n, err := f(c) |
| 140 | + if err == nil && n > 0 { |
| 141 | + return n, err |
| 142 | + } |
| 143 | + } |
| 144 | + return -1, errors.New(`didn't find <a href="?importers">`) |
| 145 | + } |
| 146 | + |
| 147 | + doc, err := html.Parse(resp.Body) |
| 148 | + if err != nil { |
| 149 | + return -1, fmt.Errorf("couldn't parse html: %v", err) |
| 150 | + } |
| 151 | + return f(doc) |
| 152 | +} |
0 commit comments