Skip to content

Commit

Permalink
Drop the doctype check (#10906)
Browse files Browse the repository at this point in the history
Co-authored-by: Pradyun Gedam <pradyunsg@gmail.com>
  • Loading branch information
q0w and pradyunsg authored Feb 27, 2022
1 parent 82aebdb commit a4f7508
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 88 deletions.
1 change: 1 addition & 0 deletions news/10903.removal.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Drop the doctype check, that presented a warning for index pages that use non-compliant HTML 5.
24 changes: 0 additions & 24 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,30 +181,6 @@ class UninstallationError(PipError):
"""General exception during uninstallation"""


class BadHTMLDoctypeDeclaration(DiagnosticPipError):
reference = "bad-index-doctype"

def __init__(self, *, url: str) -> None:
super().__init__(
kind="warning",
message=(
"The package index page being used does not have a proper HTML "
"doctype declaration."
),
context=f"Problematic URL: {escape(url)}",
note_stmt="This is an issue with the page at the URL mentioned above.",
hint_stmt=(
"You might need to reach out to the owner of that package index, "
"to get this fixed. "
"See https://github.com/pypa/pip/issues/10825 for context."
),
)


class MissingHTMLDoctypeDeclaration(BadHTMLDoctypeDeclaration):
reference = "missing-index-doctype"


class MissingPyProjectBuildRequires(DiagnosticPipError):
"""Raised when pyproject.toml has `build-system`, but no `build-system.requires`."""

Expand Down
27 changes: 1 addition & 26 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,7 @@
from pip._vendor.requests import Response
from pip._vendor.requests.exceptions import RetryError, SSLError

from pip._internal.exceptions import (
BadHTMLDoctypeDeclaration,
MissingHTMLDoctypeDeclaration,
NetworkConnectionError,
)
from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.link import Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
Expand Down Expand Up @@ -401,33 +397,12 @@ class HTMLLinkParser(HTMLParser):

def __init__(self, url: str) -> None:
super().__init__(convert_charrefs=True)
self._dealt_with_doctype_issues = False

self.url: str = url
self.base_url: Optional[str] = None
self.anchors: List[Dict[str, Optional[str]]] = []

def handle_decl(self, decl: str) -> None:
self._dealt_with_doctype_issues = True
match = re.match(
r"""doctype\s+html\s*(?:SYSTEM\s+(["'])about:legacy-compat\1)?\s*$""",
decl,
re.IGNORECASE,
)
if match is None:
logger.warning(
"[present-diagnostic] %s",
BadHTMLDoctypeDeclaration(url=self.url),
)

def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
if not self._dealt_with_doctype_issues:
logger.warning(
"[present-diagnostic] %s",
MissingHTMLDoctypeDeclaration(url=self.url),
)
self._dealt_with_doctype_issues = True

if tag == "base" and self.base_url is None:
href = self.get_href(attrs)
if href is not None:
Expand Down
16 changes: 16 additions & 0 deletions tests/functional/test_install_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import shutil
import textwrap
import urllib.parse

Expand All @@ -24,6 +25,21 @@ def test_find_links_relative_path(script: PipTestEnvironment, data: TestData) ->
result.did_create(initools_folder)


def test_find_links_no_doctype(script: PipTestEnvironment, data: TestData) -> None:
shutil.copy(data.packages / "simple-1.0.tar.gz", script.scratch_path)
html = script.scratch_path.joinpath("index.html")
html.write_text('<a href="simple-1.0.tar.gz"></a>')
result = script.pip(
"install",
"simple==1.0",
"--no-index",
"--find-links",
script.scratch_path,
expect_stderr=True,
)
assert not result.stderr


@pytest.mark.usefixtures("with_wheel")
def test_find_links_requirements_file_relative_path(
script: PipTestEnvironment, data: TestData
Expand Down
38 changes: 0 additions & 38 deletions tests/unit/test_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,44 +551,6 @@ def test_parse_link_handles_deprecated_usage_properly() -> None:
assert "pkg1-2.0" in parsed_links[1].url


def test_parse_links_presents_warning_on_missing_doctype(
caplog: pytest.LogCaptureFixture,
) -> None:
html = b'<a href="/pkg1-1.0.tar.gz"></a><a href="/pkg1-2.0.tar.gz"></a>'
url = "https://example.com/simple/"
page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False)

with caplog.at_level(logging.WARN):
parsed_links = list(parse_links(page, use_deprecated_html5lib=False))

assert len(parsed_links) == 2, parsed_links
assert "pkg1-1.0" in parsed_links[0].url
assert "pkg1-2.0" in parsed_links[1].url

assert len(caplog.records) == 1


def test_parse_links_presents_warning_on_html4_doctype(
caplog: pytest.LogCaptureFixture,
) -> None:
html = (
b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
b'"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">'
b'<a href="/pkg1-1.0.tar.gz"></a><a href="/pkg1-2.0.tar.gz"></a>'
)
url = "https://example.com/simple/"
page = HTMLPage(html, encoding=None, url=url, cache_link_parsing=False)

with caplog.at_level(logging.WARN):
parsed_links = list(parse_links(page, use_deprecated_html5lib=False))

assert len(parsed_links) == 2, parsed_links
assert "pkg1-1.0" in parsed_links[0].url
assert "pkg1-2.0" in parsed_links[1].url

assert len(caplog.records) == 1


@mock.patch("pip._internal.index.collector.raise_for_status")
def test_request_http_error(
mock_raise_for_status: mock.Mock, caplog: pytest.LogCaptureFixture
Expand Down

0 comments on commit a4f7508

Please sign in to comment.