Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support JSON API for proxpi server responses #8

Merged
merged 23 commits into from
Aug 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10', '3.11-dev']
python-version: [3.6, 3.7, 3.8, 3.9, '3.10']
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
Expand Down
54 changes: 54 additions & 0 deletions scripts/benchmark-json-api-response-size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Benchmark JSON API response size again HTML API.

Note: ``requests`` by default asks for gzip/zlib compression.
"""

import re
import sys
import json

import requests

normalise_pattern = re.compile(r"[^a-z\d-]+")

packages_json = sys.stdin.read()
package_names = sorted(set(p["name"] for p in json.loads(packages_json)))

print("| Project | HTML size (kB) | JSON size (kB) | JSON size ratio |")
print("| ------- | -------------- | -------------- | --------------- |")
ratios = []
for package_name in package_names:
package_name = normalise_pattern.sub("-", package_name.lower())

response = requests.get(
f"http://localhost:5042/index/{package_name}/", headers={"Accept": "text/html"}
)
if not response.ok:
print(
f"Failed '{package_name}: [{response.status_code}] {response.reason}",
file=sys.stderr,
)
continue
assert response.headers["Content-Encoding"] in ("gzip", "deflate")
html_length = response.headers["Content-Length"]

response = requests.get(
f"http://localhost:5042/index/{package_name}/", headers={
"Accept": "application/vnd.pypi.simple.latest+json",
}
)
assert response.headers["Content-Encoding"] in ("gzip", "deflate")
response.raise_for_status()
json_length = response.headers["Content-Length"]

ratio = json_length / html_length
ratios.append(ratio)

html_length = round(int(html_length) / 1024, 1)
json_length = round(int(json_length) / 1024, 1)
ratio = round(ratio, 2)
print(f"| {package_name} | {html_length} | {json_length} | {ratio} |")

mean_ratio = sum(ratios) / len(ratios)
ratio_stddev = (sum((r - mean_ratio) ** 2.0 for r in ratios) / len(ratios)) ** 0.5
print(f"\nAverage ratio: {mean_ratio} ± {ratio_stddev}")
101 changes: 97 additions & 4 deletions src/proxpi/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import flask
import jinja2
import werkzeug.exceptions

from . import _cache

Expand Down Expand Up @@ -57,6 +58,63 @@ def _set_handler(self, *_, **__):
if handler.level > logging.DEBUG:
handler.level = logging.DEBUG
logger.info("Cache: %r", cache)
KNOWN_LATEST_JSON_VERSION = "v1"
KNOWN_DATASET_KEYS = ["requires-python", "dist-info-metadata", "gpg-sig", "yanked"]


def _wants_json(version: str = "v1") -> bool:
"""Determine if client wants a JSON response.

First checks `format` request query paramater, and if its value is a
known content-type, decides if client wants JSON. Then falls back to
HTTP content-negotiation, where the decision is based on the quality
of the JSON content-type (JSON must be equally or more preferred to
HTML, but strictly more preferred to 'text/html').

Args:
version: PyPI JSON response content-type version
"""

if version == KNOWN_LATEST_JSON_VERSION:
try:
wants_json = _wants_json("latest")
except werkzeug.exceptions.NotAcceptable:
pass
else:
if wants_json:
return True

json_key = f"application/vnd.pypi.simple.{version}+json"
html_keys = {
"text/html",
"application/vnd.pypi.simple.v1+html",
"application/vnd.pypi.simple.latest+html",
}

if flask.request.args.get("format"):
if flask.request.args["format"] == json_key:
return True
elif flask.request.args["format"] in html_keys:
return False

json_quality = flask.request.accept_mimetypes.quality(json_key)
html_quality = max(flask.request.accept_mimetypes.quality(k) for k in html_keys)
iana_html_quality = flask.request.accept_mimetypes.quality("text/html")

if not json_quality and not html_quality:
flask.abort(406)
return (
json_quality
and json_quality >= html_quality
and json_quality > iana_html_quality
)


def _build_json_response(data: dict, version: str = "v1") -> flask.Response:
response = flask.jsonify(data)
response.mimetype = f"application/vnd.pypi.simple.{version}+json"
return response


BINARY_FILE_MIME_TYPE = (
os.environ.get("PROXPI_BINARY_FILE_MIME_TYPE", "")
Expand Down Expand Up @@ -94,8 +152,17 @@ def index():
def list_packages():
"""List all projects in index(es)."""
package_names = cache.list_projects()
text = flask.render_template("packages.html", package_names=package_names)
return _compress(text)
if _wants_json():
response = _build_json_response(data={
"meta": {"api-version": "1.0"},
"projects": [{"name": n} for n in package_names],
}) # fmt: skip
else:
response = flask.make_response(
flask.render_template("packages.html", package_names=package_names),
)
response.vary = (", " if response.vary else "") + "Accept"
return _compress(response)


@app.route("/index/<package_name>/")
Expand All @@ -106,8 +173,34 @@ def list_files(package_name: str):
except _cache.NotFound:
flask.abort(404)
raise
text = flask.render_template("files.html", package_name=package_name, files=files)
return _compress(text)

if _wants_json():
files_data = []
for file in files:
file_data = {"filename": file.name, "url": file.name, "hashes": {}}
for part in file.fragment.split(","):
try:
hash_name, hash_value = part.split("=")
except ValueError:
continue
file_data["hashes"][hash_name] = hash_value
for data_set_key in KNOWN_DATASET_KEYS:
if f"data-{data_set_key}" in file.attributes:
file_data[data_set_key] = file.attributes[f"data-{data_set_key}"]
files_data.append(file_data)
response = _build_json_response(data={
"meta": {"api-version": "1.0"},
"name": package_name,
"files": files_data,
}) # fmt: skip

else:
response = flask.make_response(
flask.render_template("files.html", package_name=package_name, files=files),
)

response.vary = (", " if response.vary else "") + "Accept"
return _compress(response)


@app.route("/index/<package_name>/<file_name>")
Expand Down
69 changes: 65 additions & 4 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,14 @@ def server(mock_root_index, mock_extra_index):
yield from _utils.make_server(proxpi_server.app)


def test_list(server):
@pytest.mark.parametrize("accept", ["text/html", "application/vnd.pypi.simple.v1+html"])
def test_list(server, accept):
"""Test getting package list."""
response = requests.get(f"{server}/index/")
response = requests.get(f"{server}/index/", headers={"Accept": accept})
response.raise_for_status()

assert response.headers["Content-Type"][:9] == "text/html"
assert "Accept" in response.headers["Vary"]
assert any(
response.headers["Content-Encoding"] == a
for a in ["gzip", "deflate"]
Expand All @@ -201,13 +204,34 @@ def test_list(server):
assert href == f"{text}/"


@pytest.mark.parametrize("accept", [
"application/vnd.pypi.simple.v1+json",
"application/vnd.pypi.simple.latest+json",
])
def test_list_json(server, accept):
"""Test getting package list with JSON API."""
response = requests.get(f"{server}/index/", headers={"Accept": accept})
assert response.status_code == 200
assert response.headers["Content-Type"][:35] == (
"application/vnd.pypi.simple.v1+json"
)
assert "Accept" in response.headers["Vary"]
assert response.json()["meta"] == {"api-version": "1.0"}
assert any(p == {"name": "proxpi"} for p in response.json()["projects"])


@pytest.mark.parametrize("project", ["proxpi", "numpy", "scipy"])
def test_package(server, project):
@pytest.mark.parametrize("accept", [
"text/html", "application/vnd.pypi.simple.v1+html", "*/*"
])
def test_package(server, project, accept):
"""Test getting package files."""
project_url = f"{server}/index/{project}/"
response = requests.get(project_url)
response = requests.get(project_url, headers={"Accept": accept})
response.raise_for_status()

assert response.headers["Content-Type"][:9] == "text/html"
assert "Accept" in response.headers["Vary"]
assert any(
response.headers["Content-Encoding"] == a
for a in ["gzip", "deflate"]
Expand Down Expand Up @@ -256,6 +280,43 @@ def test_package(server, project):
assert specifier.filter(["1.2", "2.7", "3.3", "3.7", "3.10", "3.12"])


@pytest.mark.parametrize("accept", [
"application/vnd.pypi.simple.v1+json",
"application/vnd.pypi.simple.latest+json",
])
@pytest.mark.parametrize("query_format", [False, True])
def test_package_json(server, accept, query_format):
"""Test getting package files with JSON API."""
params = None
headers = None
if query_format:
params = {"format": accept}
else:
headers = {"Accept": accept}
response = requests.get(
f"{server}/index/proxpi/", params=params, headers=headers
)

assert response.status_code == 200
assert response.headers["Content-Type"][:35] == (
"application/vnd.pypi.simple.v1+json"
)
assert "Accept" in response.headers["Vary"]
assert response.json()["meta"] == {"api-version": "1.0"}
assert response.json()["name"] == "proxpi"
assert all(f["url"] and f["filename"] == f["url"] for f in response.json()["files"])
assert all("hashes" in f for f in response.json()["files"])


def test_package_unknown_accept(server):
"""Test getting package files raises 406 with unknown accept-type."""
response = requests.get(
f"{server}/index/proxpi/",
headers={"Accept": "application/vnd.pypi.simple.v42+xml"}
)
assert response.status_code == 406


def test_invalidate_list(server):
"""Test invalidating package list cache."""
response = requests.delete(f"{server}/cache/list")
Expand Down