Skip to content

Commit 8340918

Browse files
author
Jussi Kukkonen
committed
ngclient: allow compression in HTTP responses
This commit tries to deal with two interests: * metadata is highly repetitive and compressible: allowing compression would be good * there may be broken web servers (see https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L842) that have problems with compression on already compressed target files We can make things better for that first interest while we have no real data for the second interest -- our current workarounds to avoid compression are based on hearsay, not testing. Now that individual fetchers are possible I suggest we simplify ngclient and allow compression. As an example the pip Fetcher could still use the pip response chunking code with all their workarounds -- pip certainly has better capability to maintain a mountain of workarounds and also has endless amounts of real-world testing compared to python-tuf. Details: * Stop modifying Accept-Encoding (Requests default includes gzip) * Don't use response.raw in RequestsFetcher as there is no need anymore * Fix issue in test_session_get_timeout(): it's not mocking the error that requests really raises in this case Fixes #1251 Signed-off-by: Jussi Kukkonen <jkukkonen@vmware.com>
1 parent c6b70cf commit 8340918

File tree

2 files changed

+16
-43
lines changed

2 files changed

+16
-43
lines changed

tests/test_fetcher_ng.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from unittest.mock import Mock, patch
1818

1919
import requests
20-
import urllib3.exceptions
2120

2221
from tests import utils
2322
from tuf import unittest_toolbox
@@ -125,20 +124,22 @@ def test_http_error(self) -> None:
125124
def test_response_read_timeout(self, mock_session_get: Any) -> None:
126125
mock_response = Mock()
127126
attr = {
128-
"raw.read.side_effect": urllib3.exceptions.ReadTimeoutError(
129-
urllib3.HTTPConnectionPool("localhost"), "", "Read timed out."
127+
"iter_content.side_effect": requests.exceptions.ConnectionError(
128+
"Simulated timeout"
130129
)
131130
}
132131
mock_response.configure_mock(**attr)
133132
mock_session_get.return_value = mock_response
134133

135134
with self.assertRaises(exceptions.SlowRetrievalError):
136135
next(self.fetcher.fetch(self.url))
137-
mock_response.raw.read.assert_called_once()
136+
mock_response.iter_content.assert_called_once()
138137

139138
# Read/connect session timeout error
140139
@patch.object(
141-
requests.Session, "get", side_effect=urllib3.exceptions.TimeoutError
140+
requests.Session,
141+
"get",
142+
side_effect=requests.exceptions.Timeout("Simulated timeout"),
142143
)
143144
def test_session_get_timeout(self, mock_session_get: Any) -> None:
144145
with self.assertRaises(exceptions.SlowRetrievalError):

tuf/ngclient/_internal/requests_fetcher.py

Lines changed: 10 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
# Imports
1313
import requests
14-
import urllib3.exceptions
14+
import requests.exceptions
1515

1616
import tuf
1717
from tuf.api import exceptions
@@ -80,7 +80,7 @@ def fetch(self, url: str) -> Iterator[bytes]:
8080
response = session.get(
8181
url, stream=True, timeout=self.socket_timeout
8282
)
83-
except urllib3.exceptions.TimeoutError as e:
83+
except requests.exceptions.Timeout as e:
8484
raise exceptions.SlowRetrievalError from e
8585

8686
# Check response status.
@@ -99,26 +99,12 @@ def _chunks(self, response: "requests.Response") -> Iterator[bytes]:
9999
download."""
100100

101101
try:
102-
while True:
103-
# We download a fixed chunk of data in every round. This is
104-
# so that we can defend against slow retrieval attacks.
105-
# Furthermore, we do not wish to download an extremely
106-
# large file in one shot.
107-
108-
# NOTE: This may not handle some servers adding a
109-
# Content-Encoding header, which may cause urllib3 to
110-
# misbehave:
111-
# https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582
112-
data = response.raw.read(self.chunk_size)
113-
114-
# We might have no more data to read, we signal
115-
# that the download is complete.
116-
if not data:
117-
break
118-
102+
for data in response.iter_content(self.chunk_size):
119103
yield data
120-
121-
except urllib3.exceptions.ReadTimeoutError as e:
104+
except (
105+
requests.exceptions.ConnectionError,
106+
requests.exceptions.Timeout,
107+
) as e:
122108
raise exceptions.SlowRetrievalError from e
123109

124110
finally:
@@ -138,31 +124,17 @@ def _get_session(self, url: str) -> requests.Session:
138124
if not parsed_url.scheme or not parsed_url.hostname:
139125
raise exceptions.DownloadError("Failed to parse URL {url}")
140126

141-
session_index = parsed_url.scheme + "+" + parsed_url.hostname
127+
session_index = f"{parsed_url.scheme}+{parsed_url.hostname}"
142128
session = self._sessions.get(session_index)
143129

144130
if not session:
145131
session = requests.Session()
146132
self._sessions[session_index] = session
147133

148-
# Attach some default headers to every Session.
149-
requests_user_agent = session.headers["User-Agent"]
150-
# Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3
151-
tuf_user_agent = (
152-
"tuf/" + tuf.__version__ + " " + requests_user_agent
153-
)
154-
session.headers.update(
155-
{
156-
# Tell the server not to compress or modify anything.
157-
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives
158-
"Accept-Encoding": "identity",
159-
# The TUF user agent.
160-
"User-Agent": tuf_user_agent,
161-
}
162-
)
134+
ua = f"tuf/{tuf.__version__} {session.headers['User-Agent']}"
135+
session.headers["User-Agent"] = ua
163136

164137
logger.debug("Made new session %s", session_index)
165-
166138
else:
167139
logger.debug("Reusing session %s", session_index)
168140

0 commit comments

Comments
 (0)