Skip to content

Commit

Permalink
improve http request handling for sources
Browse files Browse the repository at this point in the history
This change refactors HTTP repository source implementations. The
following changes have been made.

- CacheControl cache now lives within Authenticator.
- Authenticator manages unique sessions for individual netloc.
- CacheControl usage now respects disable cache parameter in repos.
- Certificate and authentication logic is now managed solely within
  Authenticator for source repositories taking advantage of recent
  enhancements.

These changes should allow for better handling of cases like those
described in python-poetry#3041. Additionally, this forms the foundation for
unifying HTTP specific logic within the code base and possibly allowing
for migration of requests etc. if/when required.
  • Loading branch information
abn committed May 5, 2022
1 parent 6397bae commit bd3aa7f
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 118 deletions.
4 changes: 0 additions & 4 deletions src/poetry/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,6 @@ def create_legacy_repository(
cls, source: dict[str, str], auth_config: Config
) -> LegacyRepository:
from poetry.repositories.legacy_repository import LegacyRepository
from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert

if "url" not in source:
raise RuntimeError("Unsupported source specified")
Expand All @@ -177,8 +175,6 @@ def create_legacy_repository(
name,
url,
config=auth_config,
cert=get_cert(auth_config, name),
client_cert=get_client_cert(auth_config, name),
)

@classmethod
Expand Down
4 changes: 1 addition & 3 deletions src/poetry/repositories/cached.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from abc import abstractmethod
from typing import TYPE_CHECKING

from cachecontrol.caches import FileCache
from cachy import CacheManager
from poetry.core.semver.helpers import parse_constraint

Expand All @@ -21,7 +20,7 @@
class CachedRepository(Repository, ABC):
CACHE_VERSION = parse_constraint("1.0.0")

def __init__(self, name: str, cache_group: str, disable_cache: bool = False):
def __init__(self, name: str, disable_cache: bool = False):
super().__init__(name)
self._disable_cache = disable_cache
self._cache_dir = REPOSITORY_CACHE_DIR / name
Expand All @@ -36,7 +35,6 @@ def __init__(self, name: str, cache_group: str, disable_cache: bool = False):
},
}
)
self._cache_control_cache = FileCache(str(self._cache_dir / cache_group))

@abstractmethod
def _get_release_info(self, name: str, version: str) -> dict:
Expand Down
61 changes: 17 additions & 44 deletions src/poetry/repositories/http.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,18 @@
from __future__ import annotations

import contextlib
import hashlib
import os
import urllib
import urllib.parse

from abc import ABC
from collections import defaultdict
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from urllib.parse import quote

import requests
import requests.auth

from cachecontrol import CacheControl
from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link
from poetry.core.version.markers import parse_marker
Expand All @@ -42,64 +39,40 @@ def __init__(
url: str,
config: Config | None = None,
disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None:
super().__init__(name, "_http", disable_cache)
super().__init__(name, disable_cache)
self._url = url
self._client_cert = client_cert
self._cert = cert

self._authenticator = Authenticator(
config=config or Config(use_environment=True)
)

self._session = CacheControl(
self._authenticator.session, cache=self._cache_control_cache
config=config or Config(use_environment=True),
cache_id=name,
disable_cache=disable_cache,
)

username, password = self._authenticator.get_credentials_for_url(self._url)
if username is not None and password is not None:
self._authenticator.session.auth = requests.auth.HTTPBasicAuth(
username, password
)

if self._cert:
self._authenticator.session.verify = str(self._cert)

if self._client_cert:
self._authenticator.session.cert = str(self._client_cert)

@property
def session(self) -> CacheControl:
return self._session

def __del__(self) -> None:
with contextlib.suppress(AttributeError):
self._session.close()
def session(self) -> Authenticator:
return self._authenticator

@property
def url(self) -> str:
return self._url

@property
def cert(self) -> Path | None:
return self._cert
cert = self._authenticator.get_certs_for_url(self.url).get("verify")
if cert:
return Path(cert)
return None

@property
def client_cert(self) -> Path | None:
return self._client_cert
cert = self._authenticator.get_certs_for_url(self.url).get("cert")
if cert:
return Path(cert)
return None

@property
def authenticated_url(self) -> str:
if not self._session.auth:
return self.url

parsed = urllib.parse.urlparse(self.url)
username = quote(self._session.auth.username, safe="")
password = quote(self._session.auth.password, safe="")

return f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
return self._authenticator.authenticated_url(url=self.url)

def _download(self, url: str, dest: str) -> None:
return download_file(url, dest, session=self.session)
Expand Down Expand Up @@ -286,7 +259,7 @@ def _links_to_data(self, links: list[Link], data: PackageInfo) -> dict:
def _get_response(self, endpoint: str) -> requests.Response | None:
url = self._url + endpoint
try:
response = self.session.get(url)
response = self.session.get(url, raise_for_status=False)
if response.status_code in (401, 403):
self._log(
f"Authorization error accessing {url}",
Expand Down
8 changes: 1 addition & 7 deletions src/poetry/repositories/legacy_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@


if TYPE_CHECKING:
from pathlib import Path

from poetry.core.packages.dependency import Dependency
from poetry.core.packages.utils.link import Link

Expand All @@ -28,15 +26,11 @@ def __init__(
url: str,
config: Config | None = None,
disable_cache: bool = False,
cert: Path | None = None,
client_cert: Path | None = None,
) -> None:
if name == "pypi":
raise ValueError("The name [pypi] is reserved for repositories")

super().__init__(
name, url.rstrip("/"), config, disable_cache, cert, client_cert
)
super().__init__(name, url.rstrip("/"), config, disable_cache)

def find_packages(self, dependency: Dependency) -> list[Package]:
packages = []
Expand Down
2 changes: 1 addition & 1 deletion src/poetry/repositories/pypi_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def _get(self, endpoint: str) -> dict | None:
except requests.exceptions.TooManyRedirects:
# Cache control redirect loop.
# We try to remove the cache and try again
self._cache_control_cache.delete(self._base_url + endpoint)
self.session.delete_cache(self._base_url + endpoint)
json_response = self.session.get(self._base_url + endpoint)

if json_response.status_code == 404:
Expand Down
101 changes: 84 additions & 17 deletions src/poetry/utils/authenticator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import contextlib
import logging
import time
import urllib.parse
Expand All @@ -12,7 +13,11 @@
import requests.auth
import requests.exceptions

from cachecontrol import CacheControl
from cachecontrol.caches import FileCache

from poetry.exceptions import PoetryException
from poetry.locations import REPOSITORY_CACHE_DIR
from poetry.utils.helpers import get_cert
from poetry.utils.helpers import get_client_cert
from poetry.utils.password_manager import PasswordManager
Expand All @@ -26,43 +31,98 @@
from poetry.config.config import Config


logger = logging.getLogger()
logger = logging.getLogger(__name__)


class Authenticator:
def __init__(self, config: Config, io: IO | None = None) -> None:
def __init__(
self,
config: Config,
io: IO | None = None,
cache_id: str | None = None,
disable_cache: bool = False,
) -> None:
self._config = config
self._io = io
self._session: requests.Session | None = None
self._sessions_for_netloc: dict[str, requests.Session] = {}
self._credentials: dict[str, tuple[str, str]] = {}
self._certs: dict[str, dict[str, Path | None]] = {}
self._password_manager = PasswordManager(self._config)
self._cache_control = (
FileCache(
str(REPOSITORY_CACHE_DIR / (cache_id or "_default_cache") / "_http")
)
if not disable_cache
else None
)

def _log(self, message: str, level: str = "debug") -> None:
if self._io is not None:
self._io.write_line(f"<{level}>{message}</{level}>")
else:
getattr(logger, level, logger.debug)(message)
@property
def cache(self) -> FileCache | None:
return self._cache_control

@property
def session(self) -> requests.Session:
if self._session is None:
self._session = requests.Session()
def is_cached(self) -> bool:
return self._cache_control is not None

def create_session(self) -> requests.Session:
session = requests.Session()

if not self.is_cached:
return session

return CacheControl(sess=session, cache=self._cache_control)

def get_session(self, url: str | None = None) -> requests.Session:
if not url:
return self.create_session()

parsed_url = urllib.parse.urlsplit(url)
netloc = parsed_url.netloc

return self._session
if netloc not in self._sessions_for_netloc:
logger.debug("Creating new session for %s", netloc)
self._sessions_for_netloc[netloc] = self.create_session()

return self._sessions_for_netloc[netloc]

def close(self) -> None:
for session in [self._session, *self._sessions_for_netloc.values()]:
if session is not None:
with contextlib.suppress(AttributeError):
session.close()

def __del__(self) -> None:
if self._session is not None:
self._session.close()
self.close()

def delete_cache(self, url: str) -> None:
if self.is_cached:
self._cache_control.delete(key=url)

def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
def authenticated_url(self, url: str) -> str:
parsed = urllib.parse.urlparse(url)
username, password = self.get_credentials_for_url(url)

if username is not None and password is not None:
username = urllib.parse.quote(username, safe="")
password = urllib.parse.quote(password, safe="")

return (
f"{parsed.scheme}://{username}:{password}@{parsed.netloc}{parsed.path}"
)

return url

def request(
self, method: str, url: str, raise_for_status: bool = True, **kwargs: Any
) -> requests.Response:
request = requests.Request(method, url)
username, password = self.get_credentials_for_url(url)

if username is not None and password is not None:
request = requests.auth.HTTPBasicAuth(username, password)(request)

session = self.session
session = self.get_session(url=url)
prepared_request = session.prepare_request(request)

proxies = kwargs.get("proxies", {})
Expand Down Expand Up @@ -100,19 +160,26 @@ def request(self, method: str, url: str, **kwargs: Any) -> requests.Response:
raise e
else:
if resp.status_code not in [502, 503, 504] or is_last_attempt:
resp.raise_for_status()
if resp.status_code is not None and raise_for_status:
resp.raise_for_status()
return resp

if not is_last_attempt:
attempt += 1
delay = 0.5 * attempt
self._log(f"Retrying HTTP request in {delay} seconds.", level="debug")
logger.debug(f"Retrying HTTP request in {delay} seconds.")
time.sleep(delay)
continue

# this should never really be hit under any sane circumstance
raise PoetryException("Failed HTTP {} request", method.upper())

def get(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("get", url, **kwargs)

def post(self, url: str, **kwargs: Any) -> requests.Response:
return self.request("post", url, **kwargs)

def get_credentials_for_url(self, url: str) -> tuple[str | None, str | None]:
parsed_url = urllib.parse.urlsplit(url)

Expand Down
3 changes: 2 additions & 1 deletion src/poetry/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from requests import Session

from poetry.config.config import Config
from poetry.utils.authenticator import Authenticator


_canonicalize_regex = re.compile("[-_]+")
Expand Down Expand Up @@ -94,7 +95,7 @@ def merge_dicts(d1: dict, d2: dict) -> None:
def download_file(
url: str,
dest: str,
session: Session | None = None,
session: Authenticator | Session | None = None,
chunk_size: int = 1024,
) -> None:
import requests
Expand Down
Loading

0 comments on commit bd3aa7f

Please sign in to comment.