From 46e293e94149a25f359a2879a836cf0aad475d6c Mon Sep 17 00:00:00 2001 From: Stephen Knox Date: Sat, 2 Jul 2022 23:02:39 +0100 Subject: [PATCH 1/4] Add cert parameter to http transport params --- help.txt | 2 ++ smart_open/http.py | 19 ++++++++++++++----- smart_open/tests/test_smart_open.py | 12 ++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/help.txt b/help.txt index 6d642bba9..b5e250ea0 100644 --- a/help.txt +++ b/help.txt @@ -124,6 +124,8 @@ FUNCTIONS The username for authenticating over HTTP password: str, optional The password for authenticating over HTTP + cert: str/tuple, optional + If String, path to ssl client cert file (.pem). If Tuple, (‘cert’, ‘key’) headers: dict, optional Any headers to send in the request. If ``None``, the default headers are sent: ``{'Accept-Encoding': 'identity'}``. To use no headers at all, diff --git a/smart_open/http.py b/smart_open/http.py index e4439bd12..b83765587 100644 --- a/smart_open/http.py +++ b/smart_open/http.py @@ -49,7 +49,7 @@ def open_uri(uri, mode, transport_params): return open(uri, mode, **kwargs) -def open(uri, mode, kerberos=False, user=None, password=None, headers=None, timeout=None): +def open(uri, mode, kerberos=False, user=None, password=None, cert= None, headers=None, timeout=None): """Implement streamed reader from a web site. Supports Kerberos and Basic HTTP authentication. @@ -66,6 +66,8 @@ def open(uri, mode, kerberos=False, user=None, password=None, headers=None, time The username for authenticating over HTTP password: str, optional The password for authenticating over HTTP + cert: str/tuple, optional + if String, path to ssl client cert file (.pem). If Tuple, (‘cert’, ‘key’) headers: dict, optional Any headers to send in the request. If ``None``, the default headers are sent: ``{'Accept-Encoding': 'identity'}``. To use no headers at all, @@ -80,7 +82,8 @@ def open(uri, mode, kerberos=False, user=None, password=None, headers=None, time if mode == constants.READ_BINARY: fobj = SeekableBufferedInputBase( uri, mode, kerberos=kerberos, - user=user, password=password, headers=headers, timeout=timeout, + user=user, password=password, cert=cert, + headers=headers, timeout=timeout, ) fobj.name = os.path.basename(urllib.parse.urlparse(uri).path) return fobj @@ -90,7 +93,8 @@ def open(uri, mode, kerberos=False, user=None, password=None, headers=None, time class BufferedInputBase(io.BufferedIOBase): def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, - kerberos=False, user=None, password=None, headers=None, timeout=None): + kerberos=False, user=None, password=None, cert=None, + headers=None, timeout=None): if kerberos: import requests_kerberos auth = requests_kerberos.HTTPKerberosAuth() @@ -112,6 +116,7 @@ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, self.response = requests.get( url, auth=auth, + cert=cert, stream=True, headers=self.headers, timeout=self.timeout, @@ -204,13 +209,15 @@ def readinto(self, b): class SeekableBufferedInputBase(BufferedInputBase): """ Implement seekable streamed reader from a web site. - Supports Kerberos and Basic HTTP authentication. + Supports Kerberos, client certificate and Basic HTTP authentication. """ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, - kerberos=False, user=None, password=None, headers=None, timeout=None): + kerberos=False, user=None, password=None, cert=None, + headers=None, timeout=None): """ If Kerberos is True, will attempt to use the local Kerberos credentials. + If cert is set, will try to use a client certificate Otherwise, will try to use "basic" HTTP authentication via username/password. If none of those are set, will connect unauthenticated. @@ -230,6 +237,7 @@ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, else: self.headers = headers + self.cert = cert self.timeout = timeout self.buffer_size = buffer_size @@ -325,6 +333,7 @@ def _partial_request(self, start_pos=None): self.url, auth=self.auth, stream=True, + cert=self.cert, headers=self.headers, timeout=self.timeout, ) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index 5434c160f..bbe953304 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -448,6 +448,18 @@ def test_http_pass(self): self.assertTrue('Authorization' in actual_request.headers) self.assertTrue(actual_request.headers['Authorization'].startswith('Basic ')) + @responses.activate + def test_http_cert(self): + """Does http authentication work correctly""" + responses.add(responses.GET, "http://127.0.0.1/index.html", + body='line1\nline2', stream=True) + cert_path = '/path/to/my/cert.pem' + tp = dict(cert=cert_path) + smart_open.open("http://127.0.0.1/index.html", transport_params=tp) + self.assertEqual(len(responses.calls), 1) + actual_request = responses.calls[0].request + self.assertEqual(cert_path, actual_request.req_kwargs['cert']) + @responses.activate def _test_compressed_http(self, suffix, query): """Can open via http?""" From 3ccd038cce846bea2b2723f436433a023377265e Mon Sep 17 00:00:00 2001 From: Stephen Knox Date: Mon, 4 Jul 2022 09:26:19 +0100 Subject: [PATCH 2/4] Fix whitespace --- smart_open/http.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/smart_open/http.py b/smart_open/http.py index b83765587..4d586fce2 100644 --- a/smart_open/http.py +++ b/smart_open/http.py @@ -49,7 +49,8 @@ def open_uri(uri, mode, transport_params): return open(uri, mode, **kwargs) -def open(uri, mode, kerberos=False, user=None, password=None, cert= None, headers=None, timeout=None): +def open(uri, mode, kerberos=False, user=None, password=None, cert= None, + headers=None, timeout=None): """Implement streamed reader from a web site. Supports Kerberos and Basic HTTP authentication. @@ -82,7 +83,7 @@ def open(uri, mode, kerberos=False, user=None, password=None, cert= None, header if mode == constants.READ_BINARY: fobj = SeekableBufferedInputBase( uri, mode, kerberos=kerberos, - user=user, password=password, cert=cert, + user=user, password=password, cert=cert, headers=headers, timeout=timeout, ) fobj.name = os.path.basename(urllib.parse.urlparse(uri).path) @@ -93,7 +94,7 @@ def open(uri, mode, kerberos=False, user=None, password=None, cert= None, header class BufferedInputBase(io.BufferedIOBase): def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, - kerberos=False, user=None, password=None, cert=None, + kerberos=False, user=None, password=None, cert=None, headers=None, timeout=None): if kerberos: import requests_kerberos @@ -213,7 +214,7 @@ class SeekableBufferedInputBase(BufferedInputBase): """ def __init__(self, url, mode='r', buffer_size=DEFAULT_BUFFER_SIZE, - kerberos=False, user=None, password=None, cert=None, + kerberos=False, user=None, password=None, cert=None, headers=None, timeout=None): """ If Kerberos is True, will attempt to use the local Kerberos credentials. From 936a783b01f931416bb17a3666736e804cd16809 Mon Sep 17 00:00:00 2001 From: Stephen Knox Date: Mon, 4 Jul 2022 12:24:42 +0100 Subject: [PATCH 3/4] More whitespace --- smart_open/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/http.py b/smart_open/http.py index 4d586fce2..e6fe2aeea 100644 --- a/smart_open/http.py +++ b/smart_open/http.py @@ -49,7 +49,7 @@ def open_uri(uri, mode, transport_params): return open(uri, mode, **kwargs) -def open(uri, mode, kerberos=False, user=None, password=None, cert= None, +def open(uri, mode, kerberos=False, user=None, password=None, cert=None, headers=None, timeout=None): """Implement streamed reader from a web site. From 0760f6dc97a70b35072d850a47b5d86bf6039556 Mon Sep 17 00:00:00 2001 From: Stephen Knox Date: Mon, 4 Jul 2022 12:48:56 +0100 Subject: [PATCH 4/4] Update test description --- smart_open/tests/test_smart_open.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index bbe953304..367e9948c 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -450,7 +450,7 @@ def test_http_pass(self): @responses.activate def test_http_cert(self): - """Does http authentication work correctly""" + """Does cert parameter get passed to requests""" responses.add(responses.GET, "http://127.0.0.1/index.html", body='line1\nline2', stream=True) cert_path = '/path/to/my/cert.pem'