Skip to content

Commit

Permalink
handle metadata email parsing errors
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Sep 14, 2023
1 parent 5a34ca3 commit b09437f
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 30 deletions.
19 changes: 19 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,25 @@ def __str__(self) -> str:
)


class CacheMetadataError(PipError):
"""Raised when de/serializing a requirement into the metadata cache."""

def __init__(
self,
req: "InstallRequirement",
reason: str,
) -> None:
"""
:param req: The requirement we attempted to cache.
:param reason: Context about the precise error that occurred.
"""
self.req = req
self.reason = reason

def __str__(self) -> str:
return f"{self.reason} for {self.req} from {self.req.link}"


class UserInstallationInvalid(InstallationError):
"""A --user install is requested on an environment without user site."""

Expand Down
10 changes: 9 additions & 1 deletion src/pip/_internal/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@

from pip._internal.utils.misc import strtobool

from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
from .base import (
BaseDistribution,
BaseEnvironment,
FilesystemWheel,
MemoryWheel,
Wheel,
serialize_metadata,
)

if TYPE_CHECKING:
from typing import Literal, Protocol
Expand All @@ -23,6 +30,7 @@
"get_environment",
"get_wheel_distribution",
"select_backend",
"serialize_metadata",
]


Expand Down
15 changes: 15 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import csv
import email.generator
import email.message
import email.policy
import functools
import io
import json
import logging
import pathlib
Expand Down Expand Up @@ -97,6 +100,18 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))


def serialize_metadata(msg: email.message.Message) -> str:
"""Write a dist's metadata to a string.
Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
avoid those parsing errors."""
out = io.StringIO()
g = email.generator.Generator(out, policy=email.policy.EmailPolicy())
g.flatten(msg)
return out.getvalue()


class RequiresEntry(NamedTuple):
requirement: str
extra: str
Expand Down
42 changes: 13 additions & 29 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False

import email.errors
import gzip
import json
import mimetypes
Expand All @@ -20,6 +19,7 @@
from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.exceptions import (
CacheMetadataError,
DirectoryUrlHashUnsupported,
HashMismatch,
HashUnpinned,
Expand All @@ -32,6 +32,7 @@
from pip._internal.metadata import (
BaseDistribution,
get_metadata_distribution,
serialize_metadata,
)
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
Expand Down Expand Up @@ -230,7 +231,7 @@ class CacheableDist:
def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist":
"""Extract the serializable data necessary to generate a metadata-only dist."""
return cls(
metadata=str(dist.metadata),
metadata=serialize_metadata(dist.metadata),
filename=Path(link.filename),
canonical_name=dist.canonical_name,
)
Expand All @@ -251,7 +252,7 @@ def to_json(self) -> Dict[str, Any]:
}

@classmethod
def from_json(cls, args: Dict[str, Any]) -> "CacheableDist":
def from_json(cls, args: Dict[str, str]) -> "CacheableDist":
return cls(
metadata=args["metadata"],
filename=Path(args["filename"]),
Expand Down Expand Up @@ -458,17 +459,10 @@ def _fetch_cached_metadata(
"found cached metadata for link %s at %s", req.link, f.name
)
args = json.load(f)
cached_dist = CacheableDist.from_json(args)
return cached_dist.to_dist()
except (OSError, json.JSONDecodeError, KeyError) as e:
logger.exception(
"error reading cached metadata for link %s at %s %s(%s)",
req.link,
cached_path,
e.__class__.__name__,
str(e),
)
raise
cached_dist = CacheableDist.from_json(args)
return cached_dist.to_dist()
except Exception:
raise CacheMetadataError(req, "error reading cached metadata")

def _cache_metadata(
self,
Expand All @@ -490,23 +484,13 @@ def _cache_metadata(
# containing directory for the cache file exists before writing.
os.makedirs(str(cached_path.parent), exist_ok=True)
try:
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", req.link, cached_path)
with gzip.open(cached_path, mode="wt", encoding="utf-8") as f:
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", req.link, f.name)
json.dump(args, f)
except (OSError, email.errors.HeaderParseError) as e:
# TODO: Some dists raise email.errors.HeaderParseError when calling str() or
# bytes() on the metadata, which is an email.Message. This is probably a bug
# in email parsing.
logger.exception(
"error caching metadata for dist %s from %s: %s(%s)",
metadata_dist,
req.link,
e.__class__.__name__,
str(e),
)
raise
except Exception:
raise CacheMetadataError(req, "failed to serialize metadata")

def _fetch_metadata_using_link_data_attr(
self,
Expand Down

0 comments on commit b09437f

Please sign in to comment.