From c1607298524bb8a63e690a46ecdc1fdb4dc4b3b5 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 19:22:32 +0000 Subject: [PATCH 1/6] Add a failing test --- tests/unit/forklift/test_legacy.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index b99a4113925b..0f88364b04c0 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -2310,12 +2310,21 @@ def test_upload_fails_with_diff_filename_same_blake2( "400 File already exists. See /the/help/url/ for more information." ) - def test_upload_fails_with_wrong_filename(self, pyramid_config, db_request): + @pytest.mark.parametrize( + "project_name", + [ + "something_else", # completely different + "no", # starts with same prefix + ], + ) + def test_upload_fails_with_wrong_filename( + self, pyramid_config, db_request, project_name + ): user = UserFactory.create() pyramid_config.testing_securitypolicy(identity=user) db_request.user = user EmailFactory.create(user=user) - project = ProjectFactory.create() + project = ProjectFactory.create(name=project_name) release = ReleaseFactory.create(project=project, version="1.0") RoleFactory.create(user=user, project=project) From 6bcacc1249b2454c437eba157334e9891eff895b Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 19:32:36 +0000 Subject: [PATCH 2/6] Make the test fail all the way to DID NOT RAISE --- tests/unit/forklift/test_legacy.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 0f88364b04c0..43ea33309984 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -2318,17 +2318,25 @@ def test_upload_fails_with_diff_filename_same_blake2( ], ) def test_upload_fails_with_wrong_filename( - self, pyramid_config, db_request, project_name + self, pyramid_config, db_request, metrics, project_name ): user = UserFactory.create() pyramid_config.testing_securitypolicy(identity=user) db_request.user = user + db_request.user_agent = "warehouse-tests/6.6.6" EmailFactory.create(user=user) project = ProjectFactory.create(name=project_name) release = ReleaseFactory.create(project=project, version="1.0") RoleFactory.create(user=user, project=project) + storage_service = pretend.stub(store=lambda path, filepath, meta: None) + db_request.find_service = lambda svc, name=None, context=None: { + IFileStorage: storage_service, + IMetricsService: metrics, + }.get(svc) + filename = f"nope-{release.version}.tar.gz" + file_content = io.BytesIO(_TAR_GZ_PKG_TESTDATA) db_request.POST = MultiDict( { @@ -2336,14 +2344,15 @@ def test_upload_fails_with_wrong_filename( "name": project.name, "version": release.version, "filetype": "sdist", - "md5_digest": "nope!", + "md5_digest": _TAR_GZ_PKG_MD5, "content": pretend.stub( filename=filename, - file=io.BytesIO(b"a" * (legacy.MAX_FILESIZE + 1)), + file=file_content, type="application/tar", ), } ) + db_request.help_url = lambda **kw: "/the/help/url/" with pytest.raises(HTTPBadRequest) as excinfo: legacy.file_upload(db_request) From e6d540e6318fce21d1a1a47ad6b81a3dca2e16ef Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 19:52:48 +0000 Subject: [PATCH 3/6] Refactor the test a bit --- tests/unit/forklift/test_legacy.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index 43ea33309984..c59ca4f7a35b 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -2311,14 +2311,14 @@ def test_upload_fails_with_diff_filename_same_blake2( ) @pytest.mark.parametrize( - "project_name", + "filename_prefix, project_name", [ - "something_else", # completely different - "no", # starts with same prefix + ("nope", "something_else"), # completely different + ("nope", "no"), # starts with same prefix ], ) def test_upload_fails_with_wrong_filename( - self, pyramid_config, db_request, metrics, project_name + self, pyramid_config, db_request, metrics, filename_prefix, project_name ): user = UserFactory.create() pyramid_config.testing_securitypolicy(identity=user) @@ -2335,7 +2335,7 @@ def test_upload_fails_with_wrong_filename( IMetricsService: metrics, }.get(svc) - filename = f"nope-{release.version}.tar.gz" + filename = filename_prefix + f"-{release.version}.tar.gz" file_content = io.BytesIO(_TAR_GZ_PKG_TESTDATA) db_request.POST = MultiDict( From a0179ee4f45673b5a44d7f66b5b893cdb3970460 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 19:53:00 +0000 Subject: [PATCH 4/6] Add another failing test case --- tests/unit/forklift/test_legacy.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index c59ca4f7a35b..f79a3f14fc1c 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -2315,6 +2315,7 @@ def test_upload_fails_with_diff_filename_same_blake2( [ ("nope", "something_else"), # completely different ("nope", "no"), # starts with same prefix + ("no-way", "no"), # starts with same prefix with hyphen ], ) def test_upload_fails_with_wrong_filename( From 31b17ad94f64630e13e7b38062f56d6d7637eefd Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 20:46:11 +0000 Subject: [PATCH 5/6] More test cases --- tests/unit/forklift/test_legacy.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/unit/forklift/test_legacy.py b/tests/unit/forklift/test_legacy.py index f79a3f14fc1c..0b9817abae91 100644 --- a/tests/unit/forklift/test_legacy.py +++ b/tests/unit/forklift/test_legacy.py @@ -2311,15 +2311,21 @@ def test_upload_fails_with_diff_filename_same_blake2( ) @pytest.mark.parametrize( - "filename_prefix, project_name", + "filename, project_name", [ - ("nope", "something_else"), # completely different - ("nope", "no"), # starts with same prefix - ("no-way", "no"), # starts with same prefix with hyphen + # completely different + ("nope-{version}.tar.gz", "something_else"), + ("nope-{version}-py3-none-any.whl", "something_else"), + # starts with same prefix + ("nope-{version}.tar.gz", "no"), + ("nope-{version}-py3-none-any.whl", "no"), + # starts with same prefix with hyphen + ("no-way-{version}.tar.gz", "no"), + ("no_way-{version}-py3-none-any.whl", "no"), ], ) def test_upload_fails_with_wrong_filename( - self, pyramid_config, db_request, metrics, filename_prefix, project_name + self, pyramid_config, db_request, metrics, filename, project_name ): user = UserFactory.create() pyramid_config.testing_securitypolicy(identity=user) @@ -2336,9 +2342,6 @@ def test_upload_fails_with_wrong_filename( IMetricsService: metrics, }.get(svc) - filename = filename_prefix + f"-{release.version}.tar.gz" - file_content = io.BytesIO(_TAR_GZ_PKG_TESTDATA) - db_request.POST = MultiDict( { "metadata_version": "1.2", @@ -2347,8 +2350,8 @@ def test_upload_fails_with_wrong_filename( "filetype": "sdist", "md5_digest": _TAR_GZ_PKG_MD5, "content": pretend.stub( - filename=filename, - file=file_content, + filename=filename.format(version=release.version), + file=io.BytesIO(_TAR_GZ_PKG_TESTDATA), type="application/tar", ), } From fda76780d0fae0002bcec7385baacdbc6fe3cdc9 Mon Sep 17 00:00:00 2001 From: Dustin Ingram Date: Mon, 26 Jun 2023 20:54:54 +0000 Subject: [PATCH 6/6] Ensure filenames only have the project name No more, no less. Previously this allowed filenames that started with the project name. --- warehouse/forklift/legacy.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index f07515f055cd..3e830828c950 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -1208,10 +1208,20 @@ def file_upload(request): # Ensure the filename doesn't contain any characters that are too 🌶️spicy🥵 _validate_filename(filename) + # Extract the project name from the filename and normalize it. + filename_prefix = pkg_resources.safe_name( + # For wheels, the project name is normalized and won't contain hyphens, so + # we can split on the first hyphen. + filename.partition("-")[0] + if filename.endswith(".whl") + # For source releases, we know that the version should not contain any + # hypens, so we can split on the last hypen to get the project name. + else filename.rpartition("-")[0] + ).lower() + # Make sure that our filename matches the project that it is being uploaded # to. - prefix = pkg_resources.safe_name(project.name).lower() - if not pkg_resources.safe_name(filename).lower().startswith(prefix): + if (prefix := pkg_resources.safe_name(project.name).lower()) != filename_prefix: raise _exc_with_message( HTTPBadRequest, f"Start filename for {project.name!r} with {prefix!r}.",