From fb8ae789bd973266ca2b36b624b3f67124758ce8 Mon Sep 17 00:00:00 2001 From: Alexander Sheiko Date: Tue, 30 May 2023 17:31:00 +0300 Subject: [PATCH] Add LoginOpenAccount Close #101 again --- README.md | 12 +++- api.go | 6 +- auth.go | 164 ++++++++++++++++++++++++++++++++++++++++++++++--- auth_test.go | 7 +++ go.mod | 3 +- go.sum | 26 ++++++-- scraper.go | 15 +++++ search.go | 4 +- search_test.go | 19 +----- tweets.go | 2 +- 10 files changed, 224 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index ab35c08..08e8d62 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ import ( func main() { scraper := twitterscraper.New() - err := scraper.Login(username, password) + err := scraper.LoginOpenAccount() if err !== nil { panic(err) } @@ -183,6 +183,8 @@ func main() { Some specified user tweets are protected that you must login and follow. It is also required to search. +#### Login + ```golang err := scraper.Login("username", "password") ``` @@ -232,6 +234,14 @@ scraper.SetCookies(cookies) scraper.IsLoggedIn() ``` +#### Open account + +If you don't want to use your account, you can login as a Twitter app: + +```golang +err := scraper.LoginOpenAccount() +``` + ### Use Proxy Support HTTP(s) and SOCKS5 proxy diff --git a/api.go b/api.go index 25b065f..64ad37a 100644 --- a/api.go +++ b/api.go @@ -33,7 +33,11 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error { req.Header.Set("X-Guest-Token", s.guestToken) } - req.Header.Set("Authorization", "Bearer "+s.bearerToken) + if s.oAuthToken != "" && s.oAuthSecret != "" { + req.Header.Set("Authorization", s.sign(req.Method, req.URL)) + } else { + req.Header.Set("Authorization", "Bearer "+s.bearerToken) + } for _, cookie := range s.client.Jar.Cookies(req.URL) { if cookie.Name == "ct0" { diff --git a/auth.go b/auth.go index 696d998..4c37699 100644 --- a/auth.go +++ b/auth.go @@ -2,17 +2,27 @@ package twitterscraper import ( "bytes" + "crypto/hmac" + "crypto/sha1" + "encoding/base64" "encoding/json" "fmt" + "io" "net/http" "net/http/cookiejar" + "net/url" + "strconv" "strings" + "time" ) const ( - loginURL = "https://api.twitter.com/1.1/onboarding/task.json" - logoutURL = "https://api.twitter.com/1.1/account/logout.json" - bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + loginURL = "https://api.twitter.com/1.1/onboarding/task.json" + logoutURL = "https://api.twitter.com/1.1/account/logout.json" + oAuthURL = "https://api.twitter.com/oauth2/token" + bearerToken2 = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" + appConsumerKey = "3nVuSoBZnx6U4vzUxf5w" + appConsumerSecret = "Bcs59EFbbsdF6Sl9Ng71smgStWEGwXXKSjYvPVt7qys" ) type ( @@ -24,7 +34,11 @@ type ( FlowToken string `json:"flow_token"` Status string `json:"status"` Subtasks []struct { - SubtaskID string `json:"subtask_id"` + SubtaskID string `json:"subtask_id"` + OpenAccount struct { + OAuthToken string `json:"oauth_token"` + OAuthTokenSecret string `json:"oauth_token_secret"` + } `json:"open_account"` } `json:"subtasks"` } @@ -36,11 +50,39 @@ type ( } ) -func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) { +func (s *Scraper) getAccessToken(consumerKey, consumerSecret string) (string, error) { + req, err := http.NewRequest("POST", oAuthURL, strings.NewReader("grant_type=client_credentials")) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + req.SetBasicAuth(consumerKey, consumerSecret) + + res, err := s.client.Do(req) + if err != nil { + return "", err + } + defer res.Body.Close() + + if res.StatusCode != http.StatusOK { + body, _ := io.ReadAll(res.Body) + return "", fmt.Errorf("unexpected status code: %d, body: %s", res.StatusCode, body) + } + + var a struct { + AccessToken string `json:"access_token"` + } + if err := json.NewDecoder(res.Body).Decode(&a); err != nil { + return "", err + } + return a.AccessToken, nil +} + +func (s *Scraper) getFlow(data map[string]interface{}) (*flow, error) { headers := http.Header{ "Authorization": []string{"Bearer " + s.bearerToken}, "Content-Type": []string{"application/json"}, - "User-Agent": []string{"Mozilla/5.0 (Linux; Android 11; Nokia G20) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.88 Mobile Safari/537.36"}, + "User-Agent": []string{"TwitterAndroid/99"}, "X-Guest-Token": []string{s.guestToken}, "X-Twitter-Auth-Type": []string{"OAuth2Client"}, "X-Twitter-Active-User": []string{"yes"}, @@ -49,22 +91,31 @@ func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) { jsonData, err := json.Marshal(data) if err != nil { - return "", err + return nil, err } req, err := http.NewRequest("POST", loginURL, bytes.NewReader(jsonData)) if err != nil { - return "", err + return nil, err } req.Header = headers resp, err := s.client.Do(req) if err != nil { - return "", err + return nil, err } defer resp.Body.Close() var info flow err = json.NewDecoder(resp.Body).Decode(&info) + if err != nil { + return nil, err + } + + return &info, nil +} + +func (s *Scraper) getFlowToken(data map[string]interface{}) (string, error) { + info, err := s.getFlow(data) if err != nil { return "", err } @@ -230,6 +281,58 @@ func (s *Scraper) Login(credentials ...string) error { return nil } +// LoginOpenAccount as Twitter app +func (s *Scraper) LoginOpenAccount() error { + accessToken, err := s.getAccessToken(appConsumerKey, appConsumerSecret) + if err != nil { + return err + } + s.setBearerToken(accessToken) + + err = s.GetGuestToken() + if err != nil { + return err + } + + // flow start + data := map[string]interface{}{ + "flow_name": "welcome", + "input_flow_data": map[string]interface{}{ + "flow_context": map[string]interface{}{ + "debug_overrides": map[string]interface{}{}, + "start_location": map[string]interface{}{"location": "splash_screen"}, + }, + }, + } + flowToken, err := s.getFlowToken(data) + if err != nil { + return err + } + + // flow next link + data = map[string]interface{}{ + "flow_token": flowToken, + "subtask_inputs": []interface{}{ + map[string]interface{}{ + "subtask_id": "NextTaskOpenLink", + }, + }, + } + info, err := s.getFlow(data) + if err != nil { + return err + } + + if info.Subtasks != nil && len(info.Subtasks) > 0 { + if info.Subtasks[0].SubtaskID == "OpenAccount" { + s.oAuthToken = info.Subtasks[0].OpenAccount.OAuthToken + s.oAuthSecret = info.Subtasks[0].OpenAccount.OAuthTokenSecret + s.isLogged = true + } + } + return nil +} + // Logout is reset session func (s *Scraper) Logout() error { req, err := http.NewRequest("POST", logoutURL, nil) @@ -243,6 +346,8 @@ func (s *Scraper) Logout() error { s.isLogged = false s.guestToken = "" + s.oAuthToken = "" + s.oAuthSecret = "" s.client.Jar, _ = cookiejar.New(nil) s.setBearerToken(bearerToken) return nil @@ -263,3 +368,44 @@ func (s *Scraper) GetCookies() []*http.Cookie { func (s *Scraper) SetCookies(cookies []*http.Cookie) { s.client.Jar.SetCookies(twURL, cookies) } + +func (s *Scraper) sign(method string, ref *url.URL) string { + m := make(map[string]string) + m["oauth_consumer_key"] = appConsumerKey + m["oauth_nonce"] = "0" + m["oauth_signature_method"] = "HMAC-SHA1" + m["oauth_timestamp"] = strconv.FormatInt(time.Now().Unix(), 10) + m["oauth_token"] = s.oAuthToken + + key := []byte(appConsumerSecret + "&" + s.oAuthSecret) + h := hmac.New(sha1.New, key) + + query := ref.Query() + for k, v := range m { + query.Set(k, v) + } + + req := []string{method, ref.Scheme + "://" + ref.Host + ref.Path, query.Encode()} + var reqBuf bytes.Buffer + for _, value := range req { + if reqBuf.Len() > 0 { + reqBuf.WriteByte('&') + } + reqBuf.WriteString(url.QueryEscape(value)) + } + h.Write(reqBuf.Bytes()) + + m["oauth_signature"] = base64.StdEncoding.EncodeToString(h.Sum(nil)) + + var b bytes.Buffer + for k, v := range m { + if b.Len() > 0 { + b.WriteByte(',') + } + b.WriteString(k) + b.WriteByte('=') + b.WriteString(url.QueryEscape(v)) + } + + return "OAuth " + b.String() +} diff --git a/auth_test.go b/auth_test.go index 02a88bd..1d15758 100644 --- a/auth_test.go +++ b/auth_test.go @@ -37,3 +37,10 @@ func TestAuth(t *testing.T) { t.Error("Expected IsLoggedIn() = false") } } + +func TestLoginOpenAccount(t *testing.T) { + scraper := twitterscraper.New() + if err := scraper.LoginOpenAccount(); err != nil { + t.Fatalf("LoginOpenAccount() error = %v", err) + } +} diff --git a/go.mod b/go.mod index d2dad88..1ce3bf3 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,6 @@ go 1.16 require ( github.com/google/go-cmp v0.5.9 - golang.org/x/net v0.9.0 + golang.org/x/net v0.10.0 + golang.org/x/oauth2 v0.8.0 ) diff --git a/go.sum b/go.sum index 74a293b..0309e60 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,10 @@ +cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= @@ -5,12 +12,15 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM= -golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= +golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/oauth2 v0.8.0 h1:6dkIjl3j3LtZ/O3sTgZTMsLKSftL/B8Zgq4huOIIUu8= +golang.org/x/oauth2 v0.8.0/go.mod h1:yr7u4HXZRm1R1kBWqr/xKNqewf0plRYoB7sla+BCIXE= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -20,12 +30,13 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= @@ -35,3 +46,10 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= diff --git a/scraper.go b/scraper.go index 1598b71..dc23676 100644 --- a/scraper.go +++ b/scraper.go @@ -23,6 +23,9 @@ type Scraper struct { guestCreatedAt time.Time includeReplies bool isLogged bool + oAuthToken string + oAuthSecret string + proxy string searchMode SearchMode wg sync.WaitGroup } @@ -96,6 +99,16 @@ func (s *Scraper) WithClientTimeout(timeout time.Duration) *Scraper { // set http proxy in the format `http://HOST:PORT` // set socket proxy in the format `socks5://HOST:PORT` func (s *Scraper) SetProxy(proxyAddr string) error { + if proxyAddr == "" { + s.client.Transport = &http.Transport{ + TLSNextProto: make(map[string]func(authority string, c *tls.Conn) http.RoundTripper), + DialContext: (&net.Dialer{ + Timeout: s.client.Timeout, + }).DialContext, + } + s.proxy = "" + return nil + } if strings.HasPrefix(proxyAddr, "http") { urlproxy, err := url.Parse(proxyAddr) if err != nil { @@ -108,6 +121,7 @@ func (s *Scraper) SetProxy(proxyAddr string) error { Timeout: s.client.Timeout, }).DialContext, } + s.proxy = proxyAddr return nil } if strings.HasPrefix(proxyAddr, "socks5") { @@ -128,6 +142,7 @@ func (s *Scraper) SetProxy(proxyAddr string) error { } else { return errors.New("failed type assertion to DialContext") } + s.proxy = proxyAddr return nil } return errors.New("only support http(s) or socks5 protocol") diff --git a/search.go b/search.go index a907d56..3b4ca63 100644 --- a/search.go +++ b/search.go @@ -6,6 +6,8 @@ import ( "strconv" ) +const searchURL = "https://api.twitter.com/2/search/adaptive.json" + // SearchTweets returns channel with tweets for a given search query func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult { return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets) @@ -26,7 +28,7 @@ func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*t maxNbr = 50 } - req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json") + req, err := s.newRequest("GET", searchURL) if err != nil { return nil, err } diff --git a/search_test.go b/search_test.go index 0ba9241..65c42ac 100644 --- a/search_test.go +++ b/search_test.go @@ -10,18 +10,11 @@ import ( var searchScraper = twitterscraper.New() -func authSearchScraper() error { - if searchScraper.IsLoggedIn() { - return nil - } - return searchScraper.Login(username, password, email) -} - func TestFetchSearchCursor(t *testing.T) { if os.Getenv("SKIP_AUTH_TEST") != "" { t.Skip("Skipping test due to environment variable") } - err := authSearchScraper() + err := searchScraper.LoginOpenAccount() if err != nil { t.Fatal(err) } @@ -42,13 +35,10 @@ func TestFetchSearchCursor(t *testing.T) { } func TestGetSearchProfiles(t *testing.T) { - if os.Getenv("SKIP_AUTH_TEST") != "" { - t.Skip("Skipping test due to environment variable") - } count := 0 maxProfilesNbr := 150 dupcheck := make(map[string]bool) - err := authSearchScraper() + err := searchScraper.LoginOpenAccount() if err != nil { t.Fatal(err) } @@ -75,13 +65,10 @@ func TestGetSearchProfiles(t *testing.T) { } } func TestGetSearchTweets(t *testing.T) { - if os.Getenv("SKIP_AUTH_TEST") != "" { - t.Skip("Skipping test due to environment variable") - } count := 0 maxTweetsNbr := 150 dupcheck := make(map[string]bool) - err := authSearchScraper() + err := searchScraper.LoginOpenAccount() if err != nil { t.Fatal(err) } diff --git a/tweets.go b/tweets.go index 7eee270..45005e2 100644 --- a/tweets.go +++ b/tweets.go @@ -52,7 +52,7 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st // GetTweet get a single tweet by ID. func (s *Scraper) GetTweet(id string) (*Tweet, error) { - req, err := s.newRequest("GET", "https://twitter.com/i/api/2/timeline/conversation/"+id+".json") + req, err := s.newRequest("GET", "https://api.twitter.com/2/timeline/conversation/"+id+".json") if err != nil { return nil, err }