Skip to content

Commit

Permalink
handle metadata email parsing errors
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Sep 14, 2023
1 parent 5a34ca3 commit 227d8e8
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 32 deletions.
19 changes: 19 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,25 @@ def __str__(self) -> str:
)


class CacheMetadataError(PipError):
"""Raised when de/serializing a requirement into the metadata cache."""

def __init__(
self,
req: "InstallRequirement",
reason: str,
) -> None:
"""
:param req: The requirement we attempted to cache.
:param reason: Context about the precise error that occurred.
"""
self.req = req
self.reason = reason

def __str__(self) -> str:
return f"{self.reason} for {self.req} from {self.req.link}"


class UserInstallationInvalid(InstallationError):
"""A --user install is requested on an environment without user site."""

Expand Down
10 changes: 9 additions & 1 deletion src/pip/_internal/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@

from pip._internal.utils.misc import strtobool

from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
from .base import (
BaseDistribution,
BaseEnvironment,
FilesystemWheel,
MemoryWheel,
Wheel,
serialize_metadata,
)

if TYPE_CHECKING:
from typing import Literal, Protocol
Expand All @@ -23,6 +30,7 @@
"get_environment",
"get_wheel_distribution",
"select_backend",
"serialize_metadata",
]


Expand Down
15 changes: 15 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import csv
import email.generator
import email.message
import email.policy
import functools
import io
import json
import logging
import pathlib
Expand Down Expand Up @@ -97,6 +100,18 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))


def serialize_metadata(msg: email.message.Message) -> str:
"""Write a dist's metadata to a string.
Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
avoid those parsing errors."""
out = io.StringIO()
g = email.generator.Generator(out, policy=email.policy.EmailPolicy())
g.flatten(msg)
return out.getvalue()


class RequiresEntry(NamedTuple):
requirement: str
extra: str
Expand Down
46 changes: 15 additions & 31 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False

import email.errors
import gzip
import json
import mimetypes
import os
import shutil
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from typing import Dict, Iterable, List, Optional, Tuple

from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.requests.exceptions import InvalidSchema

from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.exceptions import (
CacheMetadataError,
DirectoryUrlHashUnsupported,
HashMismatch,
HashUnpinned,
Expand All @@ -32,6 +32,7 @@
from pip._internal.metadata import (
BaseDistribution,
get_metadata_distribution,
serialize_metadata,
)
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
Expand Down Expand Up @@ -230,7 +231,7 @@ class CacheableDist:
def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist":
"""Extract the serializable data necessary to generate a metadata-only dist."""
return cls(
metadata=str(dist.metadata),
metadata=serialize_metadata(dist.metadata),
filename=Path(link.filename),
canonical_name=dist.canonical_name,
)
Expand All @@ -243,15 +244,15 @@ def to_dist(self) -> BaseDistribution:
canonical_name=self.canonical_name,
)

def to_json(self) -> Dict[str, Any]:
def to_json(self) -> Dict[str, str]:
return {
"metadata": self.metadata,
"filename": str(self.filename),
"canonical_name": self.canonical_name,
}

@classmethod
def from_json(cls, args: Dict[str, Any]) -> "CacheableDist":
def from_json(cls, args: Dict[str, str]) -> "CacheableDist":
return cls(
metadata=args["metadata"],
filename=Path(args["filename"]),
Expand Down Expand Up @@ -458,17 +459,10 @@ def _fetch_cached_metadata(
"found cached metadata for link %s at %s", req.link, f.name
)
args = json.load(f)
cached_dist = CacheableDist.from_json(args)
return cached_dist.to_dist()
except (OSError, json.JSONDecodeError, KeyError) as e:
logger.exception(
"error reading cached metadata for link %s at %s %s(%s)",
req.link,
cached_path,
e.__class__.__name__,
str(e),
)
raise
cached_dist = CacheableDist.from_json(args)
return cached_dist.to_dist()
except Exception:
raise CacheMetadataError(req, "error reading cached metadata")

def _cache_metadata(
self,
Expand All @@ -490,23 +484,13 @@ def _cache_metadata(
# containing directory for the cache file exists before writing.
os.makedirs(str(cached_path.parent), exist_ok=True)
try:
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", req.link, cached_path)
with gzip.open(cached_path, mode="wt", encoding="utf-8") as f:
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
args = cacheable_dist.to_json()
logger.debug("caching metadata for link %s at %s", req.link, f.name)
json.dump(args, f)
except (OSError, email.errors.HeaderParseError) as e:
# TODO: Some dists raise email.errors.HeaderParseError when calling str() or
# bytes() on the metadata, which is an email.Message. This is probably a bug
# in email parsing.
logger.exception(
"error caching metadata for dist %s from %s: %s(%s)",
metadata_dist,
req.link,
e.__class__.__name__,
str(e),
)
raise
except Exception:
raise CacheMetadataError(req, "failed to serialize metadata")

def _fetch_metadata_using_link_data_attr(
self,
Expand Down

0 comments on commit 227d8e8

Please sign in to comment.