Skip to content

Commit

Permalink
Merge pull request #645 from mirkolenz/feature/search-sort-order
Browse files Browse the repository at this point in the history
Add sort_order parameter for search api
  • Loading branch information
igorbrigadir authored Jun 24, 2022
2 parents 36c2252 + c4f7fc9 commit 4b72913
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 3 deletions.
7 changes: 7 additions & 0 deletions docs/twarc2_en_us.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ leave off the `--start-time`:

twarc2 search --end-time 2014-07-24 '"eric garner"' tweets.jsonl

### Sort Order

By default, Twitter returns the results ordered by their published date with the newest tweets being first.
To alter this behavior, it is possible to specify the `--sort-order` parameter.
Currently, it supports `recency` (the default) or `relevancy`.
In the latter case, tweets are ordered based on what Twitter determines to be the best results for your query.

## Searches

Searches works like the [search](#search) command, but instead of taking a single query, it reads from a file containing many queries. You can use the same limit and time options just like a single search command, but it will be applied to every query.
Expand Down
5 changes: 3 additions & 2 deletions test_twarc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,13 @@ def test_sample():
assert count == 11


def test_search_recent():
@pytest.mark.parametrize("sort_order", ["recency", "relevancy"])
def test_search_recent(sort_order):

found_tweets = 0
pages = 0

for response_page in T.search_recent("#auspol"):
for response_page in T.search_recent("#auspol", sort_order=sort_order):
pages += 1
tweets = response_page["data"]
found_tweets += len(tweets)
Expand Down
14 changes: 13 additions & 1 deletion twarc/client2.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def _prepare_params(self, **kwargs):
)

# Any other parameters passed as is,
# these include backfill_minutes, next_token, pagination_token
# these include backfill_minutes, next_token, pagination_token, sort_order
params = {**params, **{k: v for k, v in kwargs.items() if v is not None}}

return params
Expand All @@ -201,6 +201,7 @@ def _search(
media_fields,
poll_fields,
place_fields,
sort_order,
next_token=None,
granularity=None,
sleep_between=0,
Expand All @@ -217,6 +218,7 @@ def _search(
start_time=start_time,
end_time=end_time,
next_token=next_token,
sort_order=sort_order
)

if granularity:
Expand Down Expand Up @@ -657,6 +659,7 @@ def search_recent(
poll_fields=None,
place_fields=None,
next_token=None,
sort_order=None,
):
"""
Search Twitter for the given query in the last seven days,
Expand All @@ -677,6 +680,8 @@ def search_recent(
Return all tweets before this time (UTC datetime).
max_results (int):
The maximum number of results per request. Max is 100.
sort_order (str):
Order tweets based on relevancy or recency.
Returns:
generator[dict]: a generator, dict for each paginated response.
Expand All @@ -696,6 +701,7 @@ def search_recent(
poll_fields=poll_fields,
place_fields=place_fields,
next_token=next_token,
sort_order=sort_order,
)

@requires_app_auth
Expand All @@ -714,6 +720,7 @@ def search_all(
poll_fields=None,
place_fields=None,
next_token=None,
sort_order=None,
):
"""
Search Twitter for the given query in the full archive,
Expand All @@ -735,6 +742,8 @@ def search_all(
Return all tweets before this time (UTC datetime).
max_results (int):
The maximum number of results per request. Max is 500.
sort_order (str):
Order tweets based on relevancy or recency.
Returns:
generator[dict]: a generator, dict for each paginated response.
Expand Down Expand Up @@ -762,6 +771,7 @@ def search_all(
place_fields=place_fields,
next_token=next_token,
sleep_between=1.05,
sort_order=sort_order,
)

@requires_app_auth
Expand Down Expand Up @@ -813,6 +823,7 @@ def counts_recent(
poll_fields=None,
place_fields=None,
granularity=granularity,
sort_order=None,
)

@requires_app_auth
Expand Down Expand Up @@ -867,6 +878,7 @@ def counts_all(
next_token=next_token,
granularity=granularity,
sleep_between=1.05,
sort_order=None,
)

def tweet_lookup(
Expand Down
41 changes: 41 additions & 0 deletions twarc/command2.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ def _search(
media_fields,
poll_fields,
place_fields,
sort_order,
):
"""
Common function to Search for tweets.
Expand Down Expand Up @@ -281,6 +282,7 @@ def _search(
media_fields=media_fields,
poll_fields=poll_fields,
place_fields=place_fields,
sort_order=sort_order,
):
_write(result, outfile)
tweet_ids = [t["id"] for t in result.get("data", [])]
Expand Down Expand Up @@ -617,6 +619,11 @@ def command_line_verbose_options(f):


@twarc2.command("search")
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@command_line_search_options
@command_line_search_archive_options
@command_line_expansions_shortcuts
Expand Down Expand Up @@ -1290,6 +1297,11 @@ def mentions(T, user_id, outfile, hide_progress, **kwargs):
@command_line_expansions_options
@command_line_progressbar_option
@click.option("--limit", default=0, help="Maximum number of tweets to return")
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@click.argument("user_id", type=str)
@click.argument("outfile", type=click.File("w"), default="-")
@click.pass_obj
Expand All @@ -1307,6 +1319,7 @@ def timeline(
exclude_retweets,
exclude_replies,
hide_progress,
sort_order,
**kwargs,
):
"""
Expand Down Expand Up @@ -1363,6 +1376,7 @@ def timeline(
end_time=end_time,
exclude_retweets=exclude_retweets,
exclude_replies=exclude_replies,
sort_order=sort_order,
**kwargs,
)

Expand Down Expand Up @@ -1394,6 +1408,11 @@ def timeline(
default=0,
help="Maximum number of tweets to return per-timeline",
)
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@command_line_search_options
@command_line_timelines_options
@command_line_expansions_shortcuts
Expand All @@ -1408,6 +1427,7 @@ def timelines(
limit,
timeline_limit,
use_search,
sort_order,
hide_progress,
**kwargs,
):
Expand Down Expand Up @@ -1489,6 +1509,7 @@ def timelines(
tweets = _timeline_tweets(
T,
use_search=use_search,
sort_order=sort_order,
user_id=user,
**kwargs,
)
Expand Down Expand Up @@ -1516,6 +1537,7 @@ def _timeline_tweets(
end_time,
exclude_retweets,
exclude_replies,
sort_order,
**kwargs,
):
if use_search:
Expand All @@ -1530,6 +1552,7 @@ def _timeline_tweets(
until_id=until_id,
start_time=start_time,
end_time=end_time,
sort_order=sort_order,
**kwargs,
)
else:
Expand All @@ -1549,6 +1572,11 @@ def _timeline_tweets(
@twarc2.command("searches")
@command_line_search_options
@command_line_search_archive_options
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@click.option(
"--counts-only",
is_flag=True,
Expand Down Expand Up @@ -1591,6 +1619,7 @@ def searches(
granularity,
combine_queries,
hide_progress,
sort_order,
**kwargs,
):
"""
Expand Down Expand Up @@ -1641,6 +1670,7 @@ def searches(
kwargs.pop("media_fields", None)
kwargs.pop("poll_fields", None)
kwargs.pop("place_fields", None)
kwargs.pop("sort_order", None)
kwargs = {
**kwargs,
**{
Expand All @@ -1665,6 +1695,7 @@ def searches(
"start_time": start_time,
"end_time": end_time,
"max_results": max_results,
"sort_order": sort_order,
},
}

Expand Down Expand Up @@ -1768,6 +1799,11 @@ def searches(


@twarc2.command("conversation")
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@command_line_search_options
@command_line_search_archive_options
@command_line_expansions_shortcuts
Expand Down Expand Up @@ -1804,6 +1840,11 @@ def conversation(
default=0,
help="Maximum number of tweets to return per-conversation",
)
@click.option(
"--sort-order",
type=click.Choice(["recency", "relevancy"]),
help='Filter tweets based on their date ("recency") (default) or based on their relevance as indicated by Twitter ("relevancy")'
)
@command_line_search_options
@command_line_search_archive_options
@command_line_expansions_shortcuts
Expand Down

0 comments on commit 4b72913

Please sign in to comment.