Adds language parameter to twitter filter collection. Fixes #943

gwu-libraries · Jul 30, 2019 · 00c9f93 · 00c9f93
1 parent 17a2332
commit 00c9f93
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 7 deletions.
diff --git a/docs/collections.rst b/docs/collections.rst
@@ -112,14 +112,14 @@ Twitter filter
 ---------------
 
 Twitter Filter collections harvest a live selection of public tweets from
-criteria matching keywords, locations, or users, based on the
+criteria matching keywords, locations, languages, or users, based on the
 `Twitter filter streaming API
 <https://developer.twitter.com/en/docs/tweets/filter-realtime/overview/statuses-filter>`_. Because
 tweets are collected live, tweets from the past are not included. (Use a
 :ref:`Twitter search` collection to find tweets from the recent past.)
 
-There are three different filter queries supported by SFM: track, follow, and
-location.
+There are four different filter queries supported by SFM: track, follow, 
+location, and language.
 
 **Track** collects tweets based on a keyword search. A space between words
 is treated as 'AND' and a comma is treated as 'OR'. Note that exact phrase
@@ -145,6 +145,12 @@ coordinates. See the `location parameter documentation
 <https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#locations>`_ for
 more information.
 
+**Language** collects tweets that Twitter detected as being written in the specified languages.
+For example, specifying `en,es` will only collect Tweets detected to be in the English or Spanish languages.
+See the `language parameter documentation
+<https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#language>`_ for
+more information.
+
 Twitter will return a limited number of tweets, so filters that return many
 results will not return all available tweets. Therefore, more narrow filters
 will usually return more complete results.

diff --git a/sfm/ui/forms.py b/sfm/ui/forms.py
@@ -566,15 +566,21 @@ class SeedTwitterFilterForm(BaseSeedForm):
                              follow</a>
                              documentation for a full list of what is returned. User <a target="_blank"
                              href="https://tweeterid.com/">TweeterID</a> to get the user ID for a screen name.""")
-    locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 4}),
+    locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}),
                                 help_text="""Provide a longitude and latitude (e.g. -74,40,-73,41) of a geographic
                                 bounding box. See Twitter <a target="blank"
                                 href="https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#locations">
                                 locations</a> for more information.""")
 
+    language = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}),
+                               help_text="""Provide a comma-separated list of two-letter <a target="blank"
+                               href="http://tools.ietf.org/html/bcp47">BCP47</a> language codes (e.g. en,es). See Twitter <a target="blank"
+                               href="https://developer.twitter.com/en/docs/tweets/filter-realtime/guides/basic-stream-parameters#language">
+                               language</a> for more information.""")
+
     def __init__(self, *args, **kwargs):
         super(SeedTwitterFilterForm, self).__init__(*args, **kwargs)
-        self.helper.layout[0][0].extend(('track', 'follow', 'locations'))
+        self.helper.layout[0][0].extend(('track', 'follow', 'locations', 'language'))
 
         if self.instance and self.instance.token:
             token = json.loads(self.instance.token)
@@ -584,6 +590,8 @@ def __init__(self, *args, **kwargs):
                 self.fields['follow'].initial = token['follow']
             if 'locations' in token:
                 self.fields['locations'].initial = token['locations']
+            if 'langauge' in token:
+                self.fields['language'].initial = token['language']
 
     def clean_track(self):
         track_val = self.cleaned_data.get("track").strip()
@@ -594,6 +602,9 @@ def clean_track(self):
     def clean_locations(self):
         return self.cleaned_data.get("locations").strip()
 
+    def clean_language(self):
+        return self.cleaned_data.get("language").strip()
+
     def clean_follow(self):
         follow_val = self.cleaned_data.get("follow").strip()
         if len(follow_val.split(",")) > 5000:
@@ -605,10 +616,11 @@ def clean(self):
         track_val = self.cleaned_data.get("track")
         follow_val = self.cleaned_data.get("follow")
         locations_val = self.cleaned_data.get("locations")
+        language_val = self.cleaned_data.get("language")
 
         # should not all be empty
-        if not track_val and not follow_val and not locations_val:
-            raise ValidationError(u'One of the following fields is required: track, follow, locations.')
+        if not track_val and not follow_val and not locations_val and not language_val:
+            raise ValidationError(u'One of the following fields is required: track, follow, locations, language.')
 
         # check follow should be number uid
         if re.compile(r'[^0-9, ]').search(follow_val):
@@ -621,6 +633,8 @@ def clean(self):
             token_val['follow'] = follow_val
         if locations_val:
             token_val['locations'] = locations_val
+        if language_val:
+            token_val['language'] = language_val
         token_val = json.dumps(token_val, ensure_ascii=False)
         # for the update view
         if self.view_type == Seed.UPDATE_VIEW:
@@ -643,6 +657,8 @@ def save(self, commit=True):
             token['follow'] = self.cleaned_data['follow']
         if self.cleaned_data['locations']:
             token['locations'] = self.cleaned_data['locations']
+        if self.cleaned_data['language']:
+            token['language'] = self.cleaned_data['language']
         m.token = json.dumps(token, ensure_ascii=False)
         m.save()
         return m