Skip to content

Commit

Permalink
Apply rate limits to search
Browse files Browse the repository at this point in the history
Due to excess spamming, we are applying rate limits to search endpoints.
Which means, for instance, a given user that requests this same endpoint 200 times in one minute
will get 429 (200/minute). This is applied globally to all search,
because otherwise same IP hitting the different endpoint could still
consume Search API quota and thus take down search.

In terms of rate limit strategy, fixed window was chosen mainly because
it consumes the least memory. Other strategies may be needed if this is
not effective.

At the moment, we don't have storage infrastructure, so we are using
in-memory storage to track requests. Ideally, some backend storage
service (Redis, Memcache) can be used.
  • Loading branch information
carkod committed Feb 20, 2023
1 parent 5da7c6a commit d99b0ae
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 14 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
run: |
Expand All @@ -23,7 +23,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
run: |
Expand All @@ -37,15 +37,12 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: Install system dependencies
- name: Test Python
run: |
pip3 install --upgrade setuptools pip
pip3 install wheel
python3 setup.py install --user test
- name: Test Python
run: python3 -m unittest discover tests
check-inclusive-naming:
runs-on: ubuntu-latest
Expand Down
1 change: 0 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,3 @@ v1.2.4, 2022-07-09 -- Remove blocklist checks; move blocking logic to core searc
v1.2.5, 2022-07-12 -- Block some more bot useragents
v1.2.6, 2022-07-13 -- Block one more useragent - "gh"
v1.2.7, 2022-07-15 -- Block more user agents - "Petalbot"
v1.2.8, 2023-02-02 -- Block more bots - "Googlebot and bingbot"
2 changes: 0 additions & 2 deletions canonicalwebteam/search/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ def get_search_results(
"HeadlessChrome/",
"Assetnote/",
"PetalBot",
"Googlebot/",
"bingbot/",
)
agent = user_agents.parse(str(flask.request.user_agent))
if (
Expand Down
12 changes: 12 additions & 0 deletions canonicalwebteam/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

# Local
from canonicalwebteam.search.models import get_search_results
from limits import storage, strategies, parse

memory_storage = storage.MemoryStorage()
fixed_window = strategies.MovingWindowRateLimiter(memory_storage)


class NoAPIKeyError(Exception):
Expand All @@ -18,6 +22,7 @@ def build_search_view(
template_path="search.html",
search_engine_id="009048213575199080868:i3zoqdwqk8o",
site_restricted_search=False,
request_limit="500/day",
):
"""
Build and return a view function that will query the
Expand Down Expand Up @@ -45,6 +50,13 @@ def search_view():
"""
Get search results from Google Custom Search
"""
# Rate limit requests to protect from spamming
# To adjust this rate visit
# https://limits.readthedocs.io/en/latest/quickstart.html#examples
limit = parse(request_limit)
rate_limit = fixed_window.hit(limit)
if not rate_limit:
return flask.abort(429, f"The rate limit is: {request_limit}")

# API key should always be provided as an environment variable
search_api_key = os.getenv("SEARCH_API_KEY")
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name="canonicalwebteam.search",
version="1.2.8",
version="1.3.0",
author="Canonical webteam",
author_email="webteam@canonical.com",
url="https://github.com/canonical/canonicalwebteam.search",
Expand All @@ -15,6 +15,6 @@
packages=find_packages(),
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
install_requires=["Flask>=1.0.2", "user-agents>=2.0.0"],
tests_require=["httpretty", "Flask>=1.0.2", "user-agents>=2.0.0"],
install_requires=["Flask>=1.0.2", "user-agents>=2.0.0", "limits>=3.2.0"],
tests_require=["httpretty"],
)
22 changes: 22 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ def setUp(self):
),
)

# Rate limits
self.app.add_url_rule(
"/server/docs/limited/search",
"server-docs-search-limited",
build_search_view(
session=session,
template_path="docs/search.html",
site_restricted_search=True,
request_limit="0/second",
),
)

self.client = self.app.test_client()

def tearDown(self):
Expand Down Expand Up @@ -246,3 +258,13 @@ def test_site_restricted_search(self):
),
search_response.data,
)

def test_rate_limit(self):
"""
Test rate limits
"""

search_response = self.client.get(
"/server/docs/limited/search?q=packer&start=20&num=3"
)
self.assertEqual(search_response.status_code, 429)

0 comments on commit d99b0ae

Please sign in to comment.