Skip to content

Commit

Permalink
fix wayback API
Browse files Browse the repository at this point in the history
  • Loading branch information
lc committed May 14, 2024
1 parent d556483 commit 69833dd
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 51 deletions.
14 changes: 5 additions & 9 deletions cmd/gau/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ func main() {

results := make(chan string)

var out = os.Stdout
out := os.Stdout
// Handle results in background
if config.Output != "" {
out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644)
if err != nil {
log.Fatalf("Could not open output file: %v\n", err)
}
Expand All @@ -54,7 +54,6 @@ func main() {

workChan := make(chan runner.Work)
gau.Start(workChan, results)

domains := flags.Args()
if len(domains) > 0 {
for _, provider := range gau.Providers {
Expand All @@ -67,15 +66,12 @@ func main() {
for _, provider := range gau.Providers {
for sc.Scan() {
workChan <- runner.NewWork(sc.Text(), provider)

if err := sc.Err(); err != nil {
log.Fatal(err)
}
}
if err := sc.Err(); err != nil {
log.Fatal(err)
}
}

}

close(workChan)

// wait for providers to fetch URLS
Expand Down
16 changes: 10 additions & 6 deletions pkg/httpclient/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@ package httpclient

import (
"errors"
"github.com/valyala/fasthttp"
"math/rand"
"time"

"github.com/valyala/fasthttp"
)

var ErrNilResponse = errors.New("unexpected nil response")
var ErrNon200Response = errors.New("API responded with non-200 status code")
var (
ErrNilResponse = errors.New("unexpected nil response")
ErrNon200Response = errors.New("API responded with non-200 status code")
ErrBadRequest = errors.New("API responded with 400 status code")
)

type Header struct {
Key string
Expand Down Expand Up @@ -54,9 +58,11 @@ func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, err
return nil, err
}
if resp.StatusCode() != 200 {
if resp.StatusCode() == 400 {
return nil, ErrBadRequest
}
return nil, ErrNon200Response
}

if resp.Body() == nil {
return nil, ErrNilResponse
}
Expand All @@ -79,9 +85,7 @@ func getUserAgent() string {
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
}

rand.Seed(time.Now().UnixNano())
randomIndex := rand.Intn(len(payload))

pick := payload[randomIndex]

return pick
Expand Down
7 changes: 4 additions & 3 deletions pkg/output/output.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package output

import (
mapset "github.com/deckarep/golang-set/v2"
jsoniter "github.com/json-iterator/go"
"github.com/valyala/bytebufferpool"
"io"
"net/url"
"path"
"strings"

mapset "github.com/deckarep/golang-set/v2"
jsoniter "github.com/json-iterator/go"
"github.com/valyala/bytebufferpool"
)

type JSONResult struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/providers/otx/otx.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package otx
import (
"context"
"fmt"

"github.com/bobesa/go-domain-util/domainutil"
jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
Expand Down Expand Up @@ -84,7 +85,6 @@ func (c *Client) formatURL(domain string, page uint) string {
}

return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page)

}

var _BaseURL = "https://otx.alienvault.com/"
Expand Down
1 change: 1 addition & 0 deletions pkg/providers/providers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package providers

import (
"context"

mapset "github.com/deckarep/golang-set/v2"
"github.com/valyala/fasthttp"
)
Expand Down
4 changes: 1 addition & 3 deletions pkg/providers/urlscan/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ import (
"strings"
)

var (
_BaseURL = "https://urlscan.io/"
)
var _BaseURL = "https://urlscan.io/"

type apiResponse struct {
Status int `json:"status"`
Expand Down
4 changes: 3 additions & 1 deletion pkg/providers/urlscan/urlscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"bytes"
"context"
"fmt"
"strings"

jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/providers"
"github.com/sirupsen/logrus"
"strings"
)

const (
Expand All @@ -30,6 +31,7 @@ func New(c *providers.Config) *Client {
func (c *Client) Name() string {
return Name
}

func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
var searchAfter string
var header httpclient.Header
Expand Down
34 changes: 7 additions & 27 deletions pkg/providers/wayback/wayback.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package wayback

import (
"context"
"errors"
"fmt"

jsoniter "github.com/json-iterator/go"
"github.com/lc/gau/v2/pkg/httpclient"
"github.com/lc/gau/v2/pkg/providers"
Expand Down Expand Up @@ -36,12 +38,7 @@ type waybackResult [][]string
// Fetch fetches all urls for a given domain and sends them to a channel.
// It returns an error should one occur.
func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error {
pages, err := c.getPagination(domain)
if err != nil {
return fmt.Errorf("failed to fetch wayback pagination: %s", err)
}

for page := uint(0); page < pages; page++ {
for page := uint(0); ; page++ {
select {
case <-ctx.Done():
return nil
Expand All @@ -51,9 +48,11 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
// make HTTP request
resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout)
if err != nil {
if errors.Is(err, httpclient.ErrBadRequest) {
return nil
}
return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err)
}

var result waybackResult
if err = jsoniter.Unmarshal(resp, &result); err != nil {
return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err)
Expand All @@ -72,7 +71,6 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string)
}
}
}
return nil
}

// formatUrl returns a formatted URL for the Wayback API
Expand All @@ -82,25 +80,7 @@ func (c *Client) formatURL(domain string, page uint) string {
}
filterParams := c.filters.GetParameters(true)
return fmt.Sprintf(
"https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&page=%d",
"https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100&page=%d",
domain, page,
) + filterParams
}

// getPagination returns the number of pages for Wayback
func (c *Client) getPagination(domain string) (uint, error) {
url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0))
resp, err := httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout)

if err != nil {
return 0, err
}

var paginationResult uint

if err = jsoniter.Unmarshal(resp, &paginationResult); err != nil {
return 0, err
}

return paginationResult, nil
}
3 changes: 2 additions & 1 deletion runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package runner
import (
"context"
"fmt"
"sync"

"github.com/lc/gau/v2/pkg/providers"
"github.com/lc/gau/v2/pkg/providers/commoncrawl"
"github.com/lc/gau/v2/pkg/providers/otx"
"github.com/lc/gau/v2/pkg/providers/urlscan"
"github.com/lc/gau/v2/pkg/providers/wayback"
"github.com/sirupsen/logrus"
"sync"
)

type Runner struct {
Expand Down

0 comments on commit 69833dd

Please sign in to comment.