From d59ec9529c60facc3c90ff3a81c99e3c0b8c6ba7 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sun, 18 Jun 2023 19:13:26 +0200 Subject: [PATCH] Parse GIFs for in the GetTweet API I am writing an app that needs to get info about all media in a tweet and forward it to a Telegram chat. Today animated GIFs are ignored in the response of TweetDetail, although the are there (exept for the caveat mentioned below). So without this change the GIFs are not present in the twitterscraper.Tweet struct. Following the analogy with the split between Photos and Videos I added GIFs to the Tweet type. There is one caveat that I found during testing that I can't really explain. But GIFs don't occur in the response unless the bearerToken2 is set. I don't know what this token means, maybe it somehow identifies a destop-browser variant of twitter frontend, but with this token the GIFs are present in the response. Please note that I never wrote Go code before in my life. I am using this library via the FFI to link it to my Rust codebase. --- timeline_v2.go | 27 +++++++++++++++ tweets.go | 36 ++++++++++++-------- tweets_test.go | 90 ++++++++++++++++++++++++++++++++++++++++++++------ types.go | 8 +++++ 4 files changed, 138 insertions(+), 23 deletions(-) diff --git a/timeline_v2.go b/timeline_v2.go index c3d031f..020177e 100644 --- a/timeline_v2.go +++ b/timeline_v2.go @@ -261,6 +261,26 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { } tw.Videos = append(tw.Videos, video) + } else if media.Type == "animated_gif" { + gif := GIF{ + ID: media.IDStr, + Preview: media.MediaURLHttps, + } + + // Twitter's API doesn't provide bitrate for GIFs, (it's always set to zero). + // Therefore we check for `>=` instead of `>` in the loop below. + // Also, GIFs have just a single variant today. Just in case that changes in the future, + // and there will be multiple variants, we'll pick the one with the highest bitrate, + // if other one will have a non-zero bitrate. + maxBitrate := 0 + for _, variant := range media.VideoInfo.Variants { + if variant.Bitrate >= maxBitrate { + gif.URL = variant.URL + maxBitrate = variant.Bitrate + } + } + + tw.GIFs = append(tw.GIFs, gif) } if !tw.SensitiveContent { @@ -315,6 +335,13 @@ func parseLegacyTweet(user *legacyUser, tweet *legacyTweet) *Tweet { } tw.HTML += fmt.Sprintf(`
`, url) } + for _, gif := range tw.GIFs { + url := gif.Preview + if stringInSlice(url, foundedMedia) { + continue + } + tw.HTML += fmt.Sprintf(`
`, url) + } tw.HTML = strings.Replace(tw.HTML, "\n", "
", -1) return tw } diff --git a/tweets.go b/tweets.go index f436366..5a44f43 100644 --- a/tweets.go +++ b/tweets.go @@ -85,14 +85,13 @@ func (s *Scraper) FetchTweetsByUserID(userID string, maxTweetsNbr int, cursor st // GetTweet get a single tweet by ID. func (s *Scraper) GetTweet(id string) (*Tweet, error) { - req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/wETHelmSuBQR5r-dgUlPxg/TweetDetail") + req, err := s.newRequest("GET", "https://twitter.com/i/api/graphql/VWFGPVAGkZMGRKGe3GFFnA/TweetDetail") if err != nil { return nil, err } variables := map[string]interface{}{ "focalTweetId": id, - "referrer": "profile", "with_rux_injections": false, "includePromotedContent": true, "withCommunity": true, @@ -103,14 +102,13 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) { } features := map[string]interface{}{ - "rweb_lists_timeline_redesign_enabled": true, - "responsive_web_graphql_exclude_directive_enabled": true, - "verified_phone_label_enabled": false, - "creator_subscriptions_tweet_preview_api_enabled": true, - "responsive_web_graphql_timeline_navigation_enabled": true, - "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, - "tweetypie_unmention_optimization_enabled": true, - "vibe_api_enabled": true, + "rweb_lists_timeline_redesign_enabled": true, + "responsive_web_graphql_exclude_directive_enabled": true, + "verified_phone_label_enabled": false, + "creator_subscriptions_tweet_preview_api_enabled": true, + "responsive_web_graphql_timeline_navigation_enabled": true, + "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false, + "tweetypie_unmention_optimization_enabled": true, "responsive_web_edit_tweet_api_enabled": true, "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true, "view_counts_everywhere_api_enabled": true, @@ -119,10 +117,8 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) { "freedom_of_speech_not_reach_fetch_enabled": true, "standardized_nudges_misinfo": true, "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false, - "interactive_text_enabled": true, - "responsive_web_text_conversations_enabled": false, "longform_notetweets_rich_text_read_enabled": true, - "longform_notetweets_inline_media_enabled": false, + "longform_notetweets_inline_media_enabled": true, "responsive_web_enhance_cards_enabled": false, } @@ -132,7 +128,21 @@ func (s *Scraper) GetTweet(id string) (*Tweet, error) { req.URL.RawQuery = query.Encode() var conversation threadedConversation + + // Surprisingly, if bearerToken2 is not set, then animated GIFs are not + // present in the response for tweets with a GIF + a photo like this one: + // https://twitter.com/Twitter/status/1580661436132757506 + curBearerToken := s.bearerToken + if curBearerToken != bearerToken2 { + s.setBearerToken(bearerToken2) + } + err = s.RequestAPI(req, &conversation) + + if curBearerToken != bearerToken2 { + s.setBearerToken(curBearerToken) + } + if err != nil { return nil, err } diff --git a/tweets_test.go b/tweets_test.go index 25c5b21..eea31ad 100644 --- a/tweets_test.go +++ b/tweets_test.go @@ -71,8 +71,18 @@ func TestGetTweets(t *testing.T) { } } -func TestGetTweet(t *testing.T) { - sample := twitterscraper.Tweet{ +func assertGetTweet(t *testing.T, expectedTweet *twitterscraper.Tweet) { + scraper := twitterscraper.New() + actualTweet, err := scraper.GetTweet(expectedTweet.ID) + if err != nil { + t.Error(err) + } else if diff := cmp.Diff(expectedTweet, actualTweet, cmpOptions...); diff != "" { + t.Error("Resulting tweet does not match the sample", diff) + } +} + +func TestGetTweetWithVideo(t *testing.T) { + expectedTweet := twitterscraper.Tweet{ ConversationID: "1328684389388185600", HTML: "That thing you didn’t Tweet but wanted to but didn’t but got so close but then were like nah.

We have a place for that now—Fleets!

Rolling out to everyone starting today.
", ID: "1328684389388185600", @@ -90,15 +100,75 @@ func TestGetTweet(t *testing.T) { URL: "https://video.twimg.com/amplify_video/1328684333599756289/vid/960x720/PcL8yv8KhgQ48Qpt.mp4?tag=13", }}, } - scraper := twitterscraper.New() - tweet, err := scraper.GetTweet("1328684389388185600") - if err != nil { - t.Error(err) - } else { - if diff := cmp.Diff(sample, *tweet, cmpOptions...); diff != "" { - t.Error("Resulting tweet does not match the sample", diff) - } + assertGetTweet(t, &expectedTweet) +} + +func TestGetTweetWithMultiplePhotos(t *testing.T) { + expectedTweet := twitterscraper.Tweet{ + ConversationID: "1390026628957417473", + HTML: `no bird too tall, no crop too short

introducing bigger and better images on iOS and Android, now available to everyone

`, + ID: "1390026628957417473", + Name: "Twitter", + PermanentURL: "https://twitter.com/Twitter/status/1390026628957417473", + Photos: []twitterscraper.Photo{ + {ID: "1390026620472332292", URL: "https://pbs.twimg.com/media/E0pd2L2XEAQ_gnn.jpg"}, + {ID: "1390026626214371334", URL: "https://pbs.twimg.com/media/E0pd2hPXoAY9-TZ.jpg"}, + }, + Text: "no bird too tall, no crop too short\n\nintroducing bigger and better images on iOS and Android, now available to everyone https://t.co/2buHfhfRAx", + TimeParsed: time.Date(2021, 5, 5, 19, 32, 28, 0, time.FixedZone("UTC", 0)), + Timestamp: 1620243148, + UserID: "783214", + Username: "Twitter", + } + assertGetTweet(t, &expectedTweet) +} + +func TestGetTweetWithGIF(t *testing.T) { + expectedTweet := twitterscraper.Tweet{ + ConversationID: "1288540609310056450", + GIFs: []twitterscraper.GIF{ + { + ID: "1288540582768517123", + Preview: "https://pbs.twimg.com/tweet_video_thumb/EeHQ1UKXoAMVxWB.jpg", + URL: "https://video.twimg.com/tweet_video/EeHQ1UKXoAMVxWB.mp4", + }, + }, + Hashtags: []string{"CountdownToMars"}, + HTML: `Like for liftoff! #CountdownToMars
`, + ID: "1288540609310056450", + Name: "Twitter", + PermanentURL: "https://twitter.com/Twitter/status/1288540609310056450", + Text: "Like for liftoff! #CountdownToMars https://t.co/yLe331pHfY", + TimeParsed: time.Date(2020, 7, 29, 18, 23, 15, 0, time.FixedZone("UTC", 0)), + Timestamp: 1596046995, + UserID: "783214", + Username: "Twitter", + } + assertGetTweet(t, &expectedTweet) +} + +func TestGetTweetWithPhotoAndGIF(t *testing.T) { + expectedTweet := twitterscraper.Tweet{ + ConversationID: "1580661436132757506", + GIFs: []twitterscraper.GIF{ + { + ID: "1580661428335382531", + Preview: "https://pbs.twimg.com/tweet_video_thumb/Fe-jMcIXkAMXK_W.jpg", + URL: "https://video.twimg.com/tweet_video/Fe-jMcIXkAMXK_W.mp4", + }, + }, + HTML: `a hit Tweet

`, + ID: "1580661436132757506", + Name: "Twitter", + PermanentURL: "https://twitter.com/Twitter/status/1580661436132757506", + Photos: []twitterscraper.Photo{{ID: "1580661428326907904", URL: "https://pbs.twimg.com/media/Fe-jMcGWQAAFWoG.jpg"}}, + Text: "a hit Tweet https://t.co/2C7cah4KzW", + TimeParsed: time.Date(2022, 10, 13, 20, 47, 8, 0, time.FixedZone("UTC", 0)), + Timestamp: 1665694028, + UserID: "783214", + Username: "Twitter", } + assertGetTweet(t, &expectedTweet) } func TestTweetMentions(t *testing.T) { diff --git a/types.go b/types.go index 0528aa5..c1ea306 100644 --- a/types.go +++ b/types.go @@ -23,9 +23,17 @@ type ( URL string } + // GIF type. + GIF struct { + ID string + Preview string + URL string + } + // Tweet type. Tweet struct { ConversationID string + GIFs []GIF Hashtags []string HTML string ID string