diff --git a/docs/collections.rst b/docs/collections.rst index 616439a0..68163e02 100644 --- a/docs/collections.rst +++ b/docs/collections.rst @@ -112,14 +112,14 @@ Twitter filter --------------- Twitter Filter collections harvest a live selection of public tweets from -criteria matching keywords, locations, or users, based on the +criteria matching keywords, locations, languages, or users, based on the `Twitter filter streaming API `_. Because tweets are collected live, tweets from the past are not included. (Use a :ref:`Twitter search` collection to find tweets from the recent past.) -There are three different filter queries supported by SFM: track, follow, and -location. +There are four different filter queries supported by SFM: track, follow, +location, and language. **Track** collects tweets based on a keyword search. A space between words is treated as 'AND' and a comma is treated as 'OR'. Note that exact phrase @@ -145,6 +145,12 @@ coordinates. See the `location parameter documentation `_ for more information. +**Language** collects tweets that Twitter detected as being written in the specified languages. +For example, specifying `en,es` will only collect Tweets detected to be in the English or Spanish languages. +See the `language parameter documentation +`_ for +more information. + Twitter will return a limited number of tweets, so filters that return many results will not return all available tweets. Therefore, more narrow filters will usually return more complete results. diff --git a/sfm/ui/forms.py b/sfm/ui/forms.py index 5217dabb..c7bd4c7d 100644 --- a/sfm/ui/forms.py +++ b/sfm/ui/forms.py @@ -566,15 +566,21 @@ class SeedTwitterFilterForm(BaseSeedForm): follow documentation for a full list of what is returned. User TweeterID to get the user ID for a screen name.""") - locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 4}), + locations = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}), help_text="""Provide a longitude and latitude (e.g. -74,40,-73,41) of a geographic bounding box. See Twitter locations for more information.""") + language = forms.CharField(required=False, widget=forms.Textarea(attrs={'rows': 2}), + help_text="""Provide a comma-separated list of two-letter BCP47 language codes (e.g. en,es). See Twitter + language for more information.""") + def __init__(self, *args, **kwargs): super(SeedTwitterFilterForm, self).__init__(*args, **kwargs) - self.helper.layout[0][0].extend(('track', 'follow', 'locations')) + self.helper.layout[0][0].extend(('track', 'follow', 'locations', 'language')) if self.instance and self.instance.token: token = json.loads(self.instance.token) @@ -584,6 +590,8 @@ def __init__(self, *args, **kwargs): self.fields['follow'].initial = token['follow'] if 'locations' in token: self.fields['locations'].initial = token['locations'] + if 'langauge' in token: + self.fields['language'].initial = token['language'] def clean_track(self): track_val = self.cleaned_data.get("track").strip() @@ -594,6 +602,9 @@ def clean_track(self): def clean_locations(self): return self.cleaned_data.get("locations").strip() + def clean_language(self): + return self.cleaned_data.get("language").strip() + def clean_follow(self): follow_val = self.cleaned_data.get("follow").strip() if len(follow_val.split(",")) > 5000: @@ -605,10 +616,11 @@ def clean(self): track_val = self.cleaned_data.get("track") follow_val = self.cleaned_data.get("follow") locations_val = self.cleaned_data.get("locations") + language_val = self.cleaned_data.get("language") # should not all be empty - if not track_val and not follow_val and not locations_val: - raise ValidationError(u'One of the following fields is required: track, follow, locations.') + if not track_val and not follow_val and not locations_val and not language_val: + raise ValidationError(u'One of the following fields is required: track, follow, locations, language.') # check follow should be number uid if re.compile(r'[^0-9, ]').search(follow_val): @@ -621,6 +633,8 @@ def clean(self): token_val['follow'] = follow_val if locations_val: token_val['locations'] = locations_val + if language_val: + token_val['language'] = language_val token_val = json.dumps(token_val, ensure_ascii=False) # for the update view if self.view_type == Seed.UPDATE_VIEW: @@ -643,6 +657,8 @@ def save(self, commit=True): token['follow'] = self.cleaned_data['follow'] if self.cleaned_data['locations']: token['locations'] = self.cleaned_data['locations'] + if self.cleaned_data['language']: + token['language'] = self.cleaned_data['language'] m.token = json.dumps(token, ensure_ascii=False) m.save() return m