diff --git a/readthedocs/constants.py b/readthedocs/constants.py index 579b937715a..8e289cf233f 100644 --- a/readthedocs/constants.py +++ b/readthedocs/constants.py @@ -1,15 +1,21 @@ -# -*- coding: utf-8 -*- - """Common constants.""" -from readthedocs.builds.version_slug import VERSION_SLUG_REGEX -from readthedocs.projects.constants import LANGUAGES_REGEX, PROJECT_SLUG_REGEX +import re +from readthedocs.builds.version_slug import VERSION_SLUG_REGEX +from readthedocs.projects.constants import ( + DOWNLOADABLE_MEDIA_TYPES, + LANGUAGES_REGEX, + PROJECT_SLUG_REGEX, +) pattern_opts = { - 'project_slug': PROJECT_SLUG_REGEX, - 'lang_slug': LANGUAGES_REGEX, - 'version_slug': VERSION_SLUG_REGEX, - 'filename_slug': '(?:.*)', - 'integer_pk': r'[\d]+', + "project_slug": PROJECT_SLUG_REGEX, + "lang_slug": LANGUAGES_REGEX, + "version_slug": VERSION_SLUG_REGEX, + "filename_slug": "(?:.*)", + "integer_pk": r"[\d]+", + "downloadable_type": "|".join( + re.escape(type_) for type_ in DOWNLOADABLE_MEDIA_TYPES + ), } diff --git a/readthedocs/projects/constants.py b/readthedocs/projects/constants.py index 08034c03395..4cae14dd265 100644 --- a/readthedocs/projects/constants.py +++ b/readthedocs/projects/constants.py @@ -34,6 +34,11 @@ MEDIA_TYPE_EPUB = 'epub' MEDIA_TYPE_HTMLZIP = 'htmlzip' MEDIA_TYPE_JSON = 'json' +DOWNLOADABLE_MEDIA_TYPES = ( + MEDIA_TYPE_PDF, + MEDIA_TYPE_EPUB, + MEDIA_TYPE_HTMLZIP, +) MEDIA_TYPES = ( MEDIA_TYPE_HTML, MEDIA_TYPE_PDF, diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index cedaddf035a..95edc910f7e 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -52,7 +52,13 @@ from readthedocs.storage import build_media_storage from readthedocs.vcs_support.backends import backend_cls -from .constants import MEDIA_TYPE_EPUB, MEDIA_TYPE_HTMLZIP, MEDIA_TYPE_PDF, MEDIA_TYPES +from .constants import ( + DOWNLOADABLE_MEDIA_TYPES, + MEDIA_TYPE_EPUB, + MEDIA_TYPE_HTMLZIP, + MEDIA_TYPE_PDF, + MEDIA_TYPES, +) log = structlog.get_logger(__name__) @@ -566,16 +572,22 @@ def get_storage_path( :return: the path to an item in storage (can be used with ``storage.url`` to get the URL) """ + if type_ not in MEDIA_TYPES: + raise ValueError("Invalid content type.") + + if include_file and type_ not in DOWNLOADABLE_MEDIA_TYPES: + raise ValueError("Invalid content type for downloadable file.") + type_dir = type_ # Add `external/` prefix for external versions if version_type == EXTERNAL: type_dir = f'{EXTERNAL}/{type_}' - folder_path = '{}/{}/{}'.format( - type_dir, - self.slug, - version_slug, - ) + # Version slug may come from an unstrusted input, + # so we use join to avoid any path traversal. + # All other values are already validated. + folder_path = build_media_storage.join(f"{type_dir}/{self.slug}", version_slug) + if include_file: extension = type_.replace('htmlzip', 'zip') return '{}/{}.{}'.format( diff --git a/readthedocs/projects/tests/test_views.py b/readthedocs/projects/tests/test_views.py index 999d312d748..3975dff5351 100644 --- a/readthedocs/projects/tests/test_views.py +++ b/readthedocs/projects/tests/test_views.py @@ -7,6 +7,7 @@ from readthedocs.integrations.models import Integration from readthedocs.invitations.models import Invitation from readthedocs.organizations.models import Organization +from readthedocs.projects.constants import DOWNLOADABLE_MEDIA_TYPES, PUBLIC from readthedocs.projects.models import Project @@ -239,3 +240,29 @@ def test_delete_last_maintainer(self): ) self.assertEqual(resp.status_code, 400) self.assertIn(self.user, self.project.users.all()) + + +@override_settings(RTD_ALLOW_ORGANIZATIONS=False) +class TestProjectDownloads(TestCase): + def setUp(self): + self.user = get(User) + self.project = get(Project, slug="project", users=[self.user]) + self.version = self.project.versions.first() + self.version.privacy_level = PUBLIC + self.version.save() + + def test_download_files(self): + for type_ in DOWNLOADABLE_MEDIA_TYPES: + resp = self.client.get( + reverse( + "project_download_media", + args=[self.project.slug, type_, self.version.slug], + ), + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(resp.status_code, 200) + extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ + self.assertEqual( + resp["X-Accel-Redirect"], + f"/proxito/media/{type_}/project/latest/project.{extension}", + ) diff --git a/readthedocs/projects/urls/public.py b/readthedocs/projects/urls/public.py index e1ca9140c19..35bb4fc8a4e 100644 --- a/readthedocs/projects/urls/public.py +++ b/readthedocs/projects/urls/public.py @@ -33,28 +33,27 @@ re_path( r'^(?P{project_slug})/downloads/$'.format(**pattern_opts), public.project_downloads, - name='project_downloads', + name="project_downloads", ), - # NOTE: this URL is kept here only for backward compatibility to serve # non-html files from the dashboard. The ``name=`` is removed to avoid # generating an invalid URL by mistake (we should manually generate it # pointing to the right place: "docs.domain.org/_/downloads/") re_path( ( - r'^(?P{project_slug})/downloads/(?P[-\w]+)/' - r'(?P{version_slug})/$'.format(**pattern_opts) + r"^(?P{project_slug})/downloads/(?P{downloadable_type})/" + r"(?P{version_slug})/$".format(**pattern_opts) ), public.ProjectDownloadMedia.as_view(), + name="project_download_media", ), - re_path( - r'^(?P{project_slug})/badge/$'.format(**pattern_opts), + r"^(?P{project_slug})/badge/$".format(**pattern_opts), public.project_badge, - name='project_badge', + name="project_badge", ), re_path( - r'^(?P{project_slug})/search/$'.format(**pattern_opts), + r"^(?P{project_slug})/search/$".format(**pattern_opts), ProjectSearchView.as_view(), name='elastic_project_search', ), diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index 0c177b7f23b..ea350f384f0 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -17,6 +17,8 @@ from readthedocs.organizations.models import Organization from readthedocs.projects import constants from readthedocs.projects.constants import ( + DOWNLOADABLE_MEDIA_TYPES, + MEDIA_TYPE_HTMLZIP, MKDOCS, PRIVATE, PUBLIC, @@ -360,6 +362,34 @@ def test_project_nginx_serving_unicode_filename(self): '/proxito/media/html/project/latest/%C3%BA%C3%B1%C3%AD%C4%8D%C3%B3d%C3%A9.html', ) + @override_settings(PYTHON_MEDIA=False) + def test_download_files(self): + for type_ in DOWNLOADABLE_MEDIA_TYPES: + resp = self.client.get( + f"/_/downloads/en/latest/{type_}/", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(resp.status_code, 200) + extension = "zip" if type_ == MEDIA_TYPE_HTMLZIP else type_ + self.assertEqual( + resp["X-Accel-Redirect"], + f"/proxito/media/{type_}/project/latest/project.{extension}", + ) + + @override_settings(PYTHON_MEDIA=False) + def test_invalid_download_files(self): + """ + Making sure we don't serve HTML or other formats here. + + See GHSA-98pf-gfh3-x3mp for more information. + """ + for type_ in ["html", "foo", "zip"]: + resp = self.client.get( + f"/_/downloads/en/latest/{type_}/", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(resp.status_code, 404) + @mock.patch.object(ServeDocsMixin, '_is_audit_enabled') def test_track_html_files_only(self, is_audit_enabled): is_audit_enabled.return_value = False diff --git a/readthedocs/proxito/urls.py b/readthedocs/proxito/urls.py index 835d4deb7c7..43ac7119aed 100644 --- a/readthedocs/proxito/urls.py +++ b/readthedocs/proxito/urls.py @@ -65,12 +65,12 @@ # /_/downloads//// re_path( ( - r'^{DOC_PATH_PREFIX}downloads/' - r'(?P{lang_slug})/' - r'(?P{version_slug})/' - r'(?P[-\w]+)/$'.format( - DOC_PATH_PREFIX=DOC_PATH_PREFIX, - **pattern_opts) + r"^{DOC_PATH_PREFIX}downloads/" + r"(?P{lang_slug})/" + r"(?P{version_slug})/" + r"(?P{downloadable_type})/$".format( + DOC_PATH_PREFIX=DOC_PATH_PREFIX, **pattern_opts + ) ), ProjectDownloadMedia.as_view(same_domain_url=True), name='project_download_media', @@ -79,13 +79,13 @@ # /_/downloads///// re_path( ( - r'^{DOC_PATH_PREFIX}downloads/' - r'(?P{project_slug})/' - r'(?P{lang_slug})/' - r'(?P{version_slug})/' - r'(?P[-\w]+)/$'.format( - DOC_PATH_PREFIX=DOC_PATH_PREFIX, - **pattern_opts) + r"^{DOC_PATH_PREFIX}downloads/" + r"(?P{project_slug})/" + r"(?P{lang_slug})/" + r"(?P{version_slug})/" + r"(?P{downloadable_type})/$".format( + DOC_PATH_PREFIX=DOC_PATH_PREFIX, **pattern_opts + ) ), ProjectDownloadMedia.as_view(same_domain_url=True), name='project_download_media', diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index f44faaa6b80..d727772849a 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -340,8 +340,9 @@ def get(self, request, proxito_path, template_name='404.html'): .only("documentation_type") .first() ) - doc_type = version.documentation_type if version else None - versions = [(version_slug, doc_type)] + versions = [] + if version: + versions.append((version.slug, version.documentation_type)) default_version_slug = final_project.get_default_version() if default_version_slug != version_slug: default_version_doc_type = ( diff --git a/readthedocs/rtd_tests/tests/test_project.py b/readthedocs/rtd_tests/tests/test_project.py index 2903a82a191..adfb293322b 100644 --- a/readthedocs/rtd_tests/tests/test_project.py +++ b/readthedocs/rtd_tests/tests/test_project.py @@ -21,7 +21,15 @@ ) from readthedocs.builds.models import Build, Version from readthedocs.oauth.services import GitHubService, GitLabService -from readthedocs.projects.constants import GITHUB_BRAND, GITLAB_BRAND +from readthedocs.projects.constants import ( + GITHUB_BRAND, + GITLAB_BRAND, + MEDIA_TYPE_EPUB, + MEDIA_TYPE_HTML, + MEDIA_TYPE_HTMLZIP, + MEDIA_TYPE_PDF, + MEDIA_TYPES, +) from readthedocs.projects.exceptions import ProjectConfigurationError from readthedocs.projects.models import Project from readthedocs.projects.tasks.utils import finish_inactive_builds @@ -137,19 +145,39 @@ def test_multiple_conf_files(self, find_method): self.pip.conf_file() def test_get_storage_path(self): + for type_ in MEDIA_TYPES: + self.assertEqual( + self.pip.get_storage_path(type_, LATEST, include_file=False), + f"{type_}/pip/latest", + ) self.assertEqual( - self.pip.get_storage_path('pdf', LATEST), + self.pip.get_storage_path(MEDIA_TYPE_PDF, LATEST), 'pdf/pip/latest/pip.pdf', ) self.assertEqual( - self.pip.get_storage_path('epub', LATEST), + self.pip.get_storage_path(MEDIA_TYPE_EPUB, LATEST), 'epub/pip/latest/pip.epub', ) self.assertEqual( - self.pip.get_storage_path('htmlzip', LATEST), + self.pip.get_storage_path(MEDIA_TYPE_HTMLZIP, LATEST), 'htmlzip/pip/latest/pip.zip', ) + def test_get_storage_path_invalid_inputs(self): + # Invalid type. + with pytest.raises(ValueError): + self.pip.get_storage_path("foo") + + # Trying to get a file from a non-downloadable type. + with pytest.raises(ValueError): + self.pip.get_storage_path(MEDIA_TYPE_HTML, include_file=True) + + # Trying path traversal. + with pytest.raises(ValueError): + self.pip.get_storage_path( + MEDIA_TYPE_HTML, version_slug="../sneaky/index.html", include_file=False + ) + def test_get_storage_path_for_external_versions(self): self.assertEqual( self.pip.get_storage_path(