Skip to content

Commit

Permalink
--max-results
Browse files Browse the repository at this point in the history
This commit adds the --max-results option to search which can control
the max_results parameter that is sent to the search/recent and
search/all endpoints. Ordinarily everyone would want to request the
maximum number of tweets per request, so as to maximize the 15 minute
request quota limit. But some occasional 503 errors have been observed
coming from the search/all endpoint and a forum post has suggested that
dialing max_results down to 100 can help.

Refs #449
  • Loading branch information
edsu committed Apr 29, 2021
1 parent 7beeb46 commit 350852d
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions twarc/command2.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,31 +157,39 @@ def get_version():
@click.option('--archive', is_flag=True, default=False,
help='Search the full archive (requires Academic Research track)')
@click.option('--limit', default=0, help='Maximum number of tweets to save')
@click.option('--max-results', default=0, help='Maximum number of tweets per API response')
@click.option('--flatten', is_flag=True, default=False,
help='Include expansions inline with tweets, and one line per tweet')
@click.argument('query', type=str)
@click.argument('outfile', type=click.File('w'), default='-')
@click.pass_obj
@cli_api_error
def search(T, query, outfile, since_id, until_id, start_time, end_time, limit, archive, flatten):
def search(T, query, outfile, since_id, until_id, start_time, end_time, limit,
max_results, archive, flatten):
"""
Search for tweets.
"""
count = 0

if archive:
search_method = T.search_all

# default number of tweets per response 500 when not set otherwise
if max_results == 0:
max_results = 500

# if the user is searching the historical archive the assumption is that
# they want to search everything, and not just the previous month which
# is the default: https://github.com/DocNow/twarc/issues/434
if start_time == None:
start_time = datetime.datetime(2006, 3, 21, 0, 0, 0, 0,
datetime.timezone.utc)
start_time = datetime.datetime(2006, 3, 21, tzinfo=datetime.timezone.utc)
else:
if max_results == 0:
max_results = 100
search_method = T.search_recent


for result in search_method(query, since_id, until_id, start_time, end_time):
for result in search_method(query, since_id, until_id, start_time, end_time,
max_results):
_write(result, outfile, flatten)
count += len(result['data'])
if limit != 0 and count >= limit:
Expand Down

0 comments on commit 350852d

Please sign in to comment.