Skip to content

Commit

Permalink
Adds language parameter to twitter filter collection. Fixes #943
Browse files Browse the repository at this point in the history
  • Loading branch information
kerchner committed Jul 30, 2019
1 parent 17a2332 commit 00c9f93
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
12 changes: 9 additions & 3 deletions docs/collections.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ Twitter filter
---------------

Twitter Filter collections harvest a live selection of public tweets from
criteria matching keywords, locations, or users, based on the
criteria matching keywords, locations, languages, or users, based on the
`Twitter filter streaming API
<https://developer.twitter.com/en/docs/tweets/filter-realtime/overview/statuses-filter>`_. Because
tweets are collected live, tweets from the past are not included. (Use a
:ref:`Twitter search` collection to find tweets from the recent past.)

There are three different filter queries supported by SFM: track, follow, and
location.
There are four different filter queries supported by SFM: track, follow,
location, and language.

**Track** collects tweets based on a keyword search. A space between words
is treated as 'AND' and a comma is treated as 'OR'. Note that exact phrase
Expand All @@ -145,6 +145,12 @@ coordinates. See the `location parameter documentation
<https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#locations>`_ for
more information.

**Language** collects tweets that Twitter detected as being written in the specified languages.
For example, specifying `en,es` will only collect Tweets detected to be in the English or Spanish languages.
See the `language parameter documentation
<https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#language>`_ for
more information.

Twitter will return a limited number of tweets, so filters that return many
results will not return all available tweets. Therefore, more narrow filters
will usually return more complete results.
Expand Down
24 changes: 20 additions & 4 deletions sfm/ui/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,21 @@ class SeedTwitterFilterForm(BaseSeedForm):
follow</a>
documentation for a full list of what is returned. User <a target="_blank"
href="https://tweeterid.com/">TweeterID</a> to get the user ID for a screen name.""")
locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 4}),
locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}),
help_text="""Provide a longitude and latitude (e.g. -74,40,-73,41) of a geographic
bounding box. See Twitter <a target="blank"
href="https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#locations">
locations</a> for more information.""")

language = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}),
help_text="""Provide a comma-separated list of two-letter <a target="blank"
href="http://tools.ietf.org/html/bcp47">BCP47</a> language codes (e.g. en,es). See Twitter <a target="blank"
href="https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#language">
language</a> for more information.""")

def __init__(self, *args, **kwargs):
super(SeedTwitterFilterForm, self).__init__(*args, **kwargs)
self.helper.layout[0][0].extend(('track', 'follow', 'locations'))
self.helper.layout[0][0].extend(('track', 'follow', 'locations', 'language'))

if self.instance and self.instance.token:
token = json.loads(self.instance.token)
Expand All @@ -584,6 +590,8 @@ def __init__(self, *args, **kwargs):
self.fields['follow'].initial = token['follow']
if 'locations' in token:
self.fields['locations'].initial = token['locations']
if 'langauge' in token:
self.fields['language'].initial = token['language']

def clean_track(self):
track_val = self.cleaned_data.get("track").strip()
Expand All @@ -594,6 +602,9 @@ def clean_track(self):
def clean_locations(self):
return self.cleaned_data.get("locations").strip()

def clean_language(self):
return self.cleaned_data.get("language").strip()

def clean_follow(self):
follow_val = self.cleaned_data.get("follow").strip()
if len(follow_val.split(",")) > 5000:
Expand All @@ -605,10 +616,11 @@ def clean(self):
track_val = self.cleaned_data.get("track")
follow_val = self.cleaned_data.get("follow")
locations_val = self.cleaned_data.get("locations")
language_val = self.cleaned_data.get("language")

# should not all be empty
if not track_val and not follow_val and not locations_val:
raise ValidationError(u'One of the following fields is required: track, follow, locations.')
if not track_val and not follow_val and not locations_val and not language_val:
raise ValidationError(u'One of the following fields is required: track, follow, locations, language.')

# check follow should be number uid
if re.compile(r'[^0-9, ]').search(follow_val):
Expand All @@ -621,6 +633,8 @@ def clean(self):
token_val['follow'] = follow_val
if locations_val:
token_val['locations'] = locations_val
if language_val:
token_val['language'] = language_val
token_val = json.dumps(token_val, ensure_ascii=False)
# for the update view
if self.view_type == Seed.UPDATE_VIEW:
Expand All @@ -643,6 +657,8 @@ def save(self, commit=True):
token['follow'] = self.cleaned_data['follow']
if self.cleaned_data['locations']:
token['locations'] = self.cleaned_data['locations']
if self.cleaned_data['language']:
token['language'] = self.cleaned_data['language']
m.token = json.dumps(token, ensure_ascii=False)
m.save()
return m
Expand Down

0 comments on commit 00c9f93

Please sign in to comment.