From 69833dd12b1672f02fb2562d673b3c94b44595de Mon Sep 17 00:00:00 2001 From: lc Date: Tue, 14 May 2024 08:11:19 -0500 Subject: [PATCH] fix wayback API --- cmd/gau/main.go | 14 +++++-------- pkg/httpclient/client.go | 16 +++++++++------ pkg/output/output.go | 7 ++++--- pkg/providers/otx/otx.go | 2 +- pkg/providers/providers.go | 1 + pkg/providers/urlscan/types.go | 4 +--- pkg/providers/urlscan/urlscan.go | 4 +++- pkg/providers/wayback/wayback.go | 34 +++++++------------------------- runner/runner.go | 3 ++- 9 files changed, 34 insertions(+), 51 deletions(-) diff --git a/cmd/gau/main.go b/cmd/gau/main.go index bed608b..44fad57 100644 --- a/cmd/gau/main.go +++ b/cmd/gau/main.go @@ -31,10 +31,10 @@ func main() { results := make(chan string) - var out = os.Stdout + out := os.Stdout // Handle results in background if config.Output != "" { - out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + out, err = os.OpenFile(config.Output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o644) if err != nil { log.Fatalf("Could not open output file: %v\n", err) } @@ -54,7 +54,6 @@ func main() { workChan := make(chan runner.Work) gau.Start(workChan, results) - domains := flags.Args() if len(domains) > 0 { for _, provider := range gau.Providers { @@ -67,15 +66,12 @@ func main() { for _, provider := range gau.Providers { for sc.Scan() { workChan <- runner.NewWork(sc.Text(), provider) - - if err := sc.Err(); err != nil { - log.Fatal(err) - } + } + if err := sc.Err(); err != nil { + log.Fatal(err) } } - } - close(workChan) // wait for providers to fetch URLS diff --git a/pkg/httpclient/client.go b/pkg/httpclient/client.go index 944c613..cbae139 100644 --- a/pkg/httpclient/client.go +++ b/pkg/httpclient/client.go @@ -2,13 +2,17 @@ package httpclient import ( "errors" - "github.com/valyala/fasthttp" "math/rand" "time" + + "github.com/valyala/fasthttp" ) -var ErrNilResponse = errors.New("unexpected nil response") -var ErrNon200Response = errors.New("API responded with non-200 status code") +var ( + ErrNilResponse = errors.New("unexpected nil response") + ErrNon200Response = errors.New("API responded with non-200 status code") + ErrBadRequest = errors.New("API responded with 400 status code") +) type Header struct { Key string @@ -54,9 +58,11 @@ func doReq(c *fasthttp.Client, req *fasthttp.Request, timeout uint) ([]byte, err return nil, err } if resp.StatusCode() != 200 { + if resp.StatusCode() == 400 { + return nil, ErrBadRequest + } return nil, ErrNon200Response } - if resp.Body() == nil { return nil, ErrNilResponse } @@ -79,9 +85,7 @@ func getUserAgent() string { "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)", } - rand.Seed(time.Now().UnixNano()) randomIndex := rand.Intn(len(payload)) - pick := payload[randomIndex] return pick diff --git a/pkg/output/output.go b/pkg/output/output.go index fa2c1bc..6b69475 100644 --- a/pkg/output/output.go +++ b/pkg/output/output.go @@ -1,13 +1,14 @@ package output import ( - mapset "github.com/deckarep/golang-set/v2" - jsoniter "github.com/json-iterator/go" - "github.com/valyala/bytebufferpool" "io" "net/url" "path" "strings" + + mapset "github.com/deckarep/golang-set/v2" + jsoniter "github.com/json-iterator/go" + "github.com/valyala/bytebufferpool" ) type JSONResult struct { diff --git a/pkg/providers/otx/otx.go b/pkg/providers/otx/otx.go index 33dbbd5..ae7839c 100644 --- a/pkg/providers/otx/otx.go +++ b/pkg/providers/otx/otx.go @@ -3,6 +3,7 @@ package otx import ( "context" "fmt" + "github.com/bobesa/go-domain-util/domainutil" jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" @@ -84,7 +85,6 @@ func (c *Client) formatURL(domain string, page uint) string { } return fmt.Sprintf("%sapi/v1/indicators/%s/%s/url_list?limit=100&page=%d", _BaseURL, category, domain, page) - } var _BaseURL = "https://otx.alienvault.com/" diff --git a/pkg/providers/providers.go b/pkg/providers/providers.go index 9e5b283..942ac1c 100644 --- a/pkg/providers/providers.go +++ b/pkg/providers/providers.go @@ -2,6 +2,7 @@ package providers import ( "context" + mapset "github.com/deckarep/golang-set/v2" "github.com/valyala/fasthttp" ) diff --git a/pkg/providers/urlscan/types.go b/pkg/providers/urlscan/types.go index 06619f0..395650b 100644 --- a/pkg/providers/urlscan/types.go +++ b/pkg/providers/urlscan/types.go @@ -4,9 +4,7 @@ import ( "strings" ) -var ( - _BaseURL = "https://urlscan.io/" -) +var _BaseURL = "https://urlscan.io/" type apiResponse struct { Status int `json:"status"` diff --git a/pkg/providers/urlscan/urlscan.go b/pkg/providers/urlscan/urlscan.go index bd10204..a0126dc 100644 --- a/pkg/providers/urlscan/urlscan.go +++ b/pkg/providers/urlscan/urlscan.go @@ -4,11 +4,12 @@ import ( "bytes" "context" "fmt" + "strings" + jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" "github.com/lc/gau/v2/pkg/providers" "github.com/sirupsen/logrus" - "strings" ) const ( @@ -30,6 +31,7 @@ func New(c *providers.Config) *Client { func (c *Client) Name() string { return Name } + func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { var searchAfter string var header httpclient.Header diff --git a/pkg/providers/wayback/wayback.go b/pkg/providers/wayback/wayback.go index e458e7d..156adcd 100644 --- a/pkg/providers/wayback/wayback.go +++ b/pkg/providers/wayback/wayback.go @@ -2,7 +2,9 @@ package wayback import ( "context" + "errors" "fmt" + jsoniter "github.com/json-iterator/go" "github.com/lc/gau/v2/pkg/httpclient" "github.com/lc/gau/v2/pkg/providers" @@ -36,12 +38,7 @@ type waybackResult [][]string // Fetch fetches all urls for a given domain and sends them to a channel. // It returns an error should one occur. func (c *Client) Fetch(ctx context.Context, domain string, results chan string) error { - pages, err := c.getPagination(domain) - if err != nil { - return fmt.Errorf("failed to fetch wayback pagination: %s", err) - } - - for page := uint(0); page < pages; page++ { + for page := uint(0); ; page++ { select { case <-ctx.Done(): return nil @@ -51,9 +48,11 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) // make HTTP request resp, err := httpclient.MakeRequest(c.config.Client, apiURL, c.config.MaxRetries, c.config.Timeout) if err != nil { + if errors.Is(err, httpclient.ErrBadRequest) { + return nil + } return fmt.Errorf("failed to fetch wayback results page %d: %s", page, err) } - var result waybackResult if err = jsoniter.Unmarshal(resp, &result); err != nil { return fmt.Errorf("failed to decode wayback results for page %d: %s", page, err) @@ -72,7 +71,6 @@ func (c *Client) Fetch(ctx context.Context, domain string, results chan string) } } } - return nil } // formatUrl returns a formatted URL for the Wayback API @@ -82,25 +80,7 @@ func (c *Client) formatURL(domain string, page uint) string { } filterParams := c.filters.GetParameters(true) return fmt.Sprintf( - "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&page=%d", + "https://web.archive.org/cdx/search/cdx?url=%s/*&output=json&collapse=urlkey&fl=original&pageSize=100&page=%d", domain, page, ) + filterParams } - -// getPagination returns the number of pages for Wayback -func (c *Client) getPagination(domain string) (uint, error) { - url := fmt.Sprintf("%s&showNumPages=true", c.formatURL(domain, 0)) - resp, err := httpclient.MakeRequest(c.config.Client, url, c.config.MaxRetries, c.config.Timeout) - - if err != nil { - return 0, err - } - - var paginationResult uint - - if err = jsoniter.Unmarshal(resp, &paginationResult); err != nil { - return 0, err - } - - return paginationResult, nil -} diff --git a/runner/runner.go b/runner/runner.go index a5efa79..730aaba 100644 --- a/runner/runner.go +++ b/runner/runner.go @@ -3,13 +3,14 @@ package runner import ( "context" "fmt" + "sync" + "github.com/lc/gau/v2/pkg/providers" "github.com/lc/gau/v2/pkg/providers/commoncrawl" "github.com/lc/gau/v2/pkg/providers/otx" "github.com/lc/gau/v2/pkg/providers/urlscan" "github.com/lc/gau/v2/pkg/providers/wayback" "github.com/sirupsen/logrus" - "sync" ) type Runner struct {