Skip to content

Commit

Permalink
Search rate limits (#33)
Browse files Browse the repository at this point in the history
* Apply rate limits to search

Due to excess spamming, we are applying rate limits to search endpoints.
Which means, for instance, a given user that requests this same endpoint 500 times in one day
will get 429 (500/day). This is applied globally to all search,
because otherwise same IP hitting the different endpoint could still
consume Search API quota and thus take down search.

In terms of rate limit strategy, fixed window was chosen mainly because
it consumes the least memory. Other strategies may be needed if this is
not effective.

At the moment, we don't have storage infrastructure, so we are using
in-memory storage to track requests. Ideally, some backend storage
service (Redis, Memcache) can be used.
  • Loading branch information
carkod authored Feb 21, 2023
1 parent f9aece5 commit 41670f7
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 14 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
run: |
Expand All @@ -23,7 +23,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
run: |
Expand All @@ -37,15 +37,12 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
- name: Test Python
run: |
pip3 install --upgrade setuptools pip
pip3 install wheel
python3 setup.py install --user test
- name: Test Python
run: python3 setup.py test
check-inclusive-naming:
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ v1.2.4, 2022-07-09 -- Remove blocklist checks; move blocking logic to core searc
v1.2.5, 2022-07-12 -- Block some more bot useragents
v1.2.6, 2022-07-13 -- Block one more useragent - "gh"
v1.2.7, 2022-07-15 -- Block more user agents - "Petalbot"
v1.3.0, 2023-02-20 -- Add rate limits
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ app.add_url_rule(
build_search_view(
session=session,
site="maas.io/docs",
template_path="docs/search.html"
search_engine_id="xxxxxxxxxx" # Optional argument, required by some of our sites
template_path="docs/search.html",
search_engine_id="xxxxxxxxxx", # Optional argument, required by some of our sites
request_limit="500/day", # Allows your to configure the limit at which the user will be forbidden to query more. Defaults to 500 per day
)
)
```
Expand Down
6 changes: 5 additions & 1 deletion canonicalwebteam/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,11 @@ def get_search_results(
"ALittle Client",
"gh",
)
bot_contains = ("HeadlessChrome/", "Assetnote/", "PetalBot")
bot_contains = (
"HeadlessChrome/",
"Assetnote/",
"PetalBot",
)
agent = user_agents.parse(str(flask.request.user_agent))
if (
agent.is_bot
Expand Down
12 changes: 12 additions & 0 deletions canonicalwebteam/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

# Local
from canonicalwebteam.search.models import get_search_results
from limits import storage, strategies, parse

memory_storage = storage.MemoryStorage()
fixed_window = strategies.MovingWindowRateLimiter(memory_storage)


class NoAPIKeyError(Exception):
Expand All @@ -18,6 +22,7 @@ def build_search_view(
template_path="search.html",
search_engine_id="009048213575199080868:i3zoqdwqk8o",
site_restricted_search=False,
request_limit="500/day",
):
"""
Build and return a view function that will query the
Expand Down Expand Up @@ -45,6 +50,13 @@ def search_view():
"""
Get search results from Google Custom Search
"""
# Rate limit requests to protect from spamming
# To adjust this rate visit
# https://limits.readthedocs.io/en/latest/quickstart.html#examples
limit = parse(request_limit)
rate_limit = fixed_window.hit(limit)
if not rate_limit:
return flask.abort(429, f"The rate limit is: {request_limit}")

# API key should always be provided as an environment variable
search_api_key = os.getenv("SEARCH_API_KEY")
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@

setup(
name="canonicalwebteam.search",
version="1.2.7",
version="1.3.0",
author="Canonical webteam",
author_email="webteam@canonical.com",
url="https://github.com/canonical-web-and-design/canonicalwebteam.search",
url="https://github.com/canonical/canonicalwebteam.search",
description=(
"Flask extension to provide a search view for querying the webteam's "
"Google Custom Search account"
),
packages=find_packages(),
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
install_requires=["Flask>=1.0.2", "user-agents>=2.0.0"],
install_requires=["Flask>=1.0.2", "user-agents>=2.0.0", "limits>=3.2.0"],
tests_require=["httpretty"],
)
22 changes: 22 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def setUp(self):
),
)

# Rate limits
self.app.add_url_rule(
"/server/docs/limited/search",
"server-docs-search-limited",
build_search_view(
session=session,
template_path="docs/search.html",
site_restricted_search=True,
request_limit="0/second",
),
)

self.client = self.app.test_client()

def tearDown(self):
Expand Down Expand Up @@ -246,3 +258,13 @@ def test_site_restricted_search(self):
),
search_response.data,
)

def test_rate_limit(self):
"""
Test rate limits
"""

search_response = self.client.get(
"/server/docs/limited/search?q=packer&start=20&num=3"
)
self.assertEqual(search_response.status_code, 429)

0 comments on commit 41670f7

Please sign in to comment.