Skip to content

Commit

Permalink
Replace aiofiles with Python threads (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
Archmonger authored Sep 12, 2024
1 parent a3f1225 commit 5f7c100
Show file tree
Hide file tree
Showing 13 changed files with 252 additions and 66 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@ Using the following categories, list your changes in this order:

### Added

- You can now utilize the Django manifest rather than scanning the filesystem when using `settings.py:SERVESTATIC_USE_MANIFEST`.
- Django `settings.py:SERVESTATIC_USE_MANIFEST` utilize the Django manifest rather than scanning the filesystem.
- When also using ServeStatic's `CompressedManifestStaticFilesStorage` backend, ServeStatic will no longer need to call `os.stat`.

### Changed

- Minimum python version is now 3.9.
- Django `setings.py:SERVESTATIC_USE_FINDERS` will now discover files strictly using the [finders API](https://docs.djangoproject.com/en/stable/ref/contrib/staticfiles/#finders-module). Previously, ServeStatic would also scan `settings.py:STATIC_ROOT` for files not found by the finders API.
- Async file reading is now done via threads rather than [`aiofiles`](https://github.com/Tinche/aiofiles) due [recent performance tests](https://github.com/mosquito/aiofile/issues/88#issuecomment-2314380621).
- `BaseServeStatic` has been renamed to `ServeStaticBase`.
- `AsgiFileServer` has been renamed to `FileServerASGI`.
- Lots of internal refactoring to improve performance, code quality, and maintainability.

## [1.2.0](https://github.com/Archmonger/ServeStatic/compare/1.1.0...1.2.0) - 2024-08-30

Expand Down
2 changes: 1 addition & 1 deletion docs/src/django-settings.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
!!! Note

The `ServeStaticMiddleware` class takes all the same configuration options as the `ServeStatic` base class, but rather than accepting keyword arguments to its constructor it uses Django settings. The setting names are just the keyword arguments upper-cased with a `SERVESTATIC_` prefix.
The `ServeStaticMiddleware` class can take the same configuration options as the `ServeStatic` base class, but rather than accepting keyword arguments to its constructor it uses Django settings. The setting names are just the keyword arguments upper-cased with a `SERVESTATIC_` prefix.

---

Expand Down
2 changes: 0 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ project_urls =

[options]
packages = find:
install_requires =
aiofiles>=22.1.0
python_requires = >=3.9
include_package_data = True
package_dir =
Expand Down
12 changes: 5 additions & 7 deletions src/servestatic/asgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,11 @@

from asgiref.compatibility import guarantee_single_callable

from servestatic.base import BaseServeStatic
from servestatic.utils import decode_path_info
from servestatic.base import ServeStaticBase
from servestatic.utils import decode_path_info, get_block_size

# This is the same size as wsgiref.FileWrapper
BLOCK_SIZE = 8192


class ServeStaticASGI(BaseServeStatic):
class ServeStaticASGI(ServeStaticBase):
user_app = None

async def __call__(self, scope, receive, send):
Expand Down Expand Up @@ -42,6 +39,7 @@ class FileServerASGI:

def __init__(self, static_file):
self.static_file = static_file
self.block_size = get_block_size()

async def __call__(self, scope, receive, send):
# Convert ASGI headers into WSGI headers. Allows us to reuse all of our WSGI
Expand Down Expand Up @@ -75,7 +73,7 @@ async def __call__(self, scope, receive, send):
# Stream the file response body
async with response.file as async_file:
while True:
chunk = await async_file.read(BLOCK_SIZE)
chunk = await async_file.read(self.block_size)
more_body = bool(chunk)
await send(
{
Expand Down
2 changes: 1 addition & 1 deletion src/servestatic/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from servestatic.utils import ensure_leading_trailing_slash, scantree


class BaseServeStatic:
class ServeStaticBase:
# Ten years is what nginx sets a max age if you use 'expires max;'
# so we'll follow its lead
FOREVER = 10 * 365 * 24 * 60 * 60
Expand Down
36 changes: 25 additions & 11 deletions src/servestatic/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,35 @@
from urllib.request import url2pathname

import django
from aiofiles.base import AiofilesContextManager
from asgiref.sync import iscoroutinefunction, markcoroutinefunction
from django.conf import settings as django_settings
from django.contrib.staticfiles import finders
from django.contrib.staticfiles.storage import (
ManifestStaticFilesStorage,
staticfiles_storage,
)
from django.http import FileResponse
from django.http import FileResponse, HttpRequest

from servestatic.responders import MissingFileError
from servestatic.responders import (
AsyncSlicedFile,
MissingFileError,
SlicedFile,
StaticFile,
)
from servestatic.utils import (
AsyncFile,
AsyncFileIterator,
AsyncToSyncIterator,
EmptyAsyncIterator,
ensure_leading_trailing_slash,
stat_files,
)
from servestatic.wsgi import ServeStatic
from servestatic.wsgi import ServeStaticBase

__all__ = ["ServeStaticMiddleware"]


class ServeStaticMiddleware(ServeStatic):
class ServeStaticMiddleware(ServeStaticBase):
"""
Wrap ServeStatic to allow it to function as Django middleware, rather
than ASGI/WSGI middleware.
Expand Down Expand Up @@ -133,7 +138,7 @@ async def __call__(self, request):
return await self.get_response(request)

@staticmethod
async def aserve(static_file, request):
async def aserve(static_file: StaticFile, request: HttpRequest):
response = await static_file.aget_response(request.method, request.META)
status = int(response.status)
http_response = AsyncServeStaticFileResponse(
Expand Down Expand Up @@ -263,13 +268,22 @@ def set_headers(self, *args, **kwargs):
pass

def _set_streaming_content(self, value):
if isinstance(value, AiofilesContextManager):
# Django < 4.2 doesn't support async file responses, so we must perform
# some conversions to ensure compatibility.
if django.VERSION < (4, 2):
if isinstance(value, AsyncFile):
value = value.open_raw()
elif isinstance(value, EmptyAsyncIterator):
value = ()
elif isinstance(value, AsyncSlicedFile):
value = SlicedFile(value.fileobj.open_raw(), value.start, value.end)

# Django 4.2+ supports async file responses, but they need to be converted from
# a file-like object to an iterator, otherwise Django will assume the content is
# a traditional (sync) file object.
elif isinstance(value, (AsyncFile, AsyncSlicedFile)):
value = AsyncFileIterator(value)

# Django < 4.2 doesn't support async file responses, so we convert to sync
if django.VERSION < (4, 2) and hasattr(value, "__aiter__"):
value = AsyncToSyncIterator(value)

super()._set_streaming_content(value)

if django.VERSION >= (4, 2):
Expand Down
28 changes: 12 additions & 16 deletions src/servestatic/responders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
from urllib.parse import quote
from wsgiref.headers import Headers

import aiofiles
from aiofiles.base import AiofilesContextManager
from aiofiles.threadpool.binary import AsyncBufferedIOBase
from servestatic.utils import AsyncFile


class Response:
Expand Down Expand Up @@ -55,6 +53,7 @@ def __init__(self, fileobj: BufferedIOBase, start: int, end: int):
self.fileobj = fileobj
self.seeked = False
self.start = start
self.end = end
self.remaining = end - start + 1

def read(self, size=-1):
Expand All @@ -74,22 +73,17 @@ def close(self):

class AsyncSlicedFile:
"""
Variant of `SlicedFile` that works as an async context manager for `aiofiles`.
This class does not need a `close` or `__await__` method, since we always open
async file handle via context managers (`async with`).
Variant of `SlicedFile` that works on async files.
"""

def __init__(self, context_manager: AiofilesContextManager, start: int, end: int):
self.fileobj: AsyncBufferedIOBase # This is populated during `__aenter__`
def __init__(self, fileobj: AsyncFile, start: int, end: int):
self.fileobj = fileobj
self.seeked = False
self.start = start
self.end = end
self.remaining = end - start + 1
self.context_manager = context_manager

async def read(self, size=-1):
if not self.fileobj: # pragma: no cover
raise RuntimeError("Async file objects need to be open via `async with`.")
if not self.seeked:
await self.fileobj.seek(self.start)
self.seeked = True
Expand All @@ -100,12 +94,14 @@ async def read(self, size=-1):
self.remaining -= len(data)
return data

async def close(self):
await self.fileobj.close()

async def __aenter__(self):
self.fileobj = await self.context_manager.__aenter__()
return self

async def __aexit__(self, exc_type, exc, tb):
return await self.context_manager.__aexit__(exc_type, exc, tb)
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close()


class StaticFile:
Expand Down Expand Up @@ -143,7 +139,7 @@ async def aget_response(self, method, request_headers):
path, headers = self.get_path_and_headers(request_headers)
# We do not await this async file handle to allow us the option of opening
# it in a thread later
file_handle = aiofiles.open(path, "rb") if method != "HEAD" else None
file_handle = AsyncFile(path, "rb") if method != "HEAD" else None
range_header = request_headers.get("HTTP_RANGE")
if range_header:
# If we can't interpret the Range request for any reason then
Expand Down
113 changes: 103 additions & 10 deletions src/servestatic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@
import asyncio
import concurrent.futures
import contextlib
import functools
import os
from typing import AsyncIterable
import threading
from concurrent.futures import ThreadPoolExecutor
from io import IOBase
from typing import AsyncIterable, Callable

from aiofiles.base import AiofilesContextManager
# This is the same size as wsgiref.FileWrapper
ASGI_BLOCK_SIZE = 8192


def get_block_size():
return ASGI_BLOCK_SIZE


# Follow Django in treating URLs as UTF-8 encoded (which requires undoing the
Expand Down Expand Up @@ -72,6 +81,92 @@ def __iter__(self):
thread_executor.shutdown(wait=False)


def open_lazy(f):
"""Decorator that ensures the file is open before calling a function.
This can be turned into a @staticmethod on `AsyncFile` once we drop Python 3.9 compatibility."""

@functools.wraps(f)
async def wrapper(self: "AsyncFile", *args, **kwargs):
if self.closed:
raise ValueError("I/O operation on closed file.")
if self.file_obj is None:
self.file_obj = await self._execute(open, *self.open_args)
return await f(self, *args, **kwargs)

return wrapper


class AsyncFile:
"""An async clone of the Python `open` function that utilizes threads for async file IO.
This currently only covers the file operations needed by ServeStatic, but could be expanded
in the future."""

def __init__(
self,
file_path,
mode: str = "r",
buffering: int = -1,
encoding: str | None = None,
errors: str | None = None,
newline: str | None = None,
closefd: bool = True,
opener: Callable[[str, int], int] | None = None,
):
self.open_args = (
file_path,
mode,
buffering,
encoding,
errors,
newline,
closefd,
opener,
)
self.loop: asyncio.AbstractEventLoop | None = None
self.executor = ThreadPoolExecutor(
max_workers=1, thread_name_prefix="ServeStatic-AsyncFile"
)
self.lock = threading.Lock()
self.file_obj: None | IOBase = None
self.closed = False

async def _execute(self, func, *args):
"""Run a function in a dedicated thread, specific to this instance."""
if self.loop is None:
self.loop = asyncio.get_event_loop()
with self.lock:
return await self.loop.run_in_executor(self.executor, func, *args)

def open_raw(self):
"""Open the file without using the executor."""
self.executor.shutdown(wait=True)
return open(*self.open_args) # pylint: disable=unspecified-encoding

async def close(self):
self.closed = True
if self.file_obj:
await self._execute(self.file_obj.close)

@open_lazy
async def read(self, size=-1):
return await self._execute(self.file_obj.read, size)

@open_lazy
async def seek(self, offset, whence=0):
return await self._execute(self.file_obj.seek, offset, whence)

@open_lazy
async def __aenter__(self):
return self

async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.close()

def __del__(self):
self.executor.shutdown(wait=True)


class EmptyAsyncIterator:
"""Placeholder async iterator for responses that have no content."""

Expand All @@ -83,17 +178,15 @@ async def __anext__(self):


class AsyncFileIterator:
def __init__(self, file_context: AiofilesContextManager):
self.file_context = file_context
"""Async iterator that yields chunks of data from the provided async file."""

async def __aiter__(self):
"""Async iterator compatible with Django Middleware. Yields chunks of data from
the provided async file context manager."""
from servestatic.asgi import BLOCK_SIZE
def __init__(self, async_file: AsyncFile):
self.async_file = async_file

async with self.file_context as async_file:
async def __aiter__(self):
async with self.async_file as file:
while True:
chunk = await async_file.read(BLOCK_SIZE)
chunk = await file.read(get_block_size())
if not chunk:
break
yield chunk
4 changes: 2 additions & 2 deletions src/servestatic/wsgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

from wsgiref.util import FileWrapper

from servestatic.base import BaseServeStatic
from servestatic.base import ServeStaticBase
from servestatic.utils import decode_path_info


class ServeStatic(BaseServeStatic):
class ServeStatic(ServeStaticBase):
def __call__(self, environ, start_response):
path = decode_path_info(environ.get("PATH_INFO", ""))
if self.autorefresh:
Expand Down
Loading

0 comments on commit 5f7c100

Please sign in to comment.