Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTPS proxy support #1512

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,4 @@ Patches and Suggestions
- Kevin Burke <kev@inburke.com>
- Flavio Curella
- David Pursehouse <david.pursehouse@gmail.com> @dpursehouse
- Marc Schlaich @schlamar
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Features
- Multipart File Uploads
- Connection Timeouts
- Thread-safety
- HTTP(S) proxy support


Installation
Expand Down
13 changes: 9 additions & 4 deletions requests/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import socket

from .models import Response
from .packages.urllib3.poolmanager import PoolManager, ProxyManager
from .packages.urllib3.poolmanager import PoolManager, proxy_from_url
from .packages.urllib3.response import HTTPResponse
from .compat import urlparse, basestring, urldefrag, unquote
from .utils import (DEFAULT_CA_BUNDLE_PATH, get_encoding_from_headers,
Expand Down Expand Up @@ -71,6 +71,7 @@ def __init__(self, pool_connections=DEFAULT_POOLSIZE,
pool_block=DEFAULT_POOLBLOCK):
self.max_retries = max_retries
self.config = {}
self.proxy_manager = {}

super(HTTPAdapter, self).__init__()

Expand Down Expand Up @@ -190,11 +191,15 @@ def get_connection(self, url, proxies=None):
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
"""
proxies = proxies or {}
proxy = proxies.get(urlparse(url.lower()).scheme)
scheme = urlparse(url.lower()).scheme
proxy = proxies.get(scheme)

if proxy:
proxy = prepend_scheme_if_needed(proxy, urlparse(url.lower()).scheme)
conn = ProxyManager(self.poolmanager.connection_from_url(proxy))
proxy = prepend_scheme_if_needed(proxy, scheme)
if not proxy in self.proxy_manager:
self.proxy_manager[proxy] = proxy_from_url(proxy)

conn = self.proxy_manager[proxy].connection_from_url(url)
else:
conn = self.poolmanager.connection_from_url(url.lower())

Expand Down
76 changes: 60 additions & 16 deletions requests/packages/urllib3/connectionpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ def connect(self):
resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
resolved_ssl_version = resolve_ssl_version(self.ssl_version)

if self._tunnel_host:
self.sock = sock
# Calls self._set_hostport(), so self.host is
# self._tunnel_host below.
self._tunnel()

# Wrap socket using verification with the root certs in
# trusted_root_certs
self.sock = ssl_wrap_socket(sock, self.key_file, self.cert_file,
Expand Down Expand Up @@ -174,12 +180,20 @@ class HTTPConnectionPool(ConnectionPool, RequestMethods):
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.

:param _proxy:
Parsed proxy URL, should not be used directly, instead, see
:class:`urllib3.connectionpool.ProxyManager`"

:param _proxy_headers:
A dictionary with proxy headers, should not be used directly,
instead, see :class:`urllib3.connectionpool.ProxyManager`"
"""

scheme = 'http'

def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
block=False, headers=None):
block=False, headers=None, _proxy=None, _proxy_headers=None):
ConnectionPool.__init__(self, host, port)
RequestMethods.__init__(self, headers)

Expand All @@ -188,6 +202,9 @@ def __init__(self, host, port=None, strict=False, timeout=None, maxsize=1,
self.pool = self.QueueCls(maxsize)
self.block = block

self.proxy = _proxy
self.proxy_headers = _proxy_headers or {}

# Fill the queue up so that doing get() on it will block properly
for _ in xrange(maxsize):
self.pool.put(None)
Expand Down Expand Up @@ -526,13 +543,14 @@ class HTTPSConnectionPool(HTTPConnectionPool):
def __init__(self, host, port=None,
strict=False, timeout=None, maxsize=1,
block=False, headers=None,
_proxy=None, _proxy_headers=None,
key_file=None, cert_file=None, cert_reqs=None,
ca_certs=None, ssl_version=None,
assert_hostname=None, assert_fingerprint=None):

HTTPConnectionPool.__init__(self, host, port,
strict, timeout, maxsize,
block, headers)
block, headers, _proxy, _proxy_headers)
self.key_file = key_file
self.cert_file = cert_file
self.cert_reqs = cert_reqs
Expand All @@ -541,6 +559,34 @@ def __init__(self, host, port=None,
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint

def _prepare_conn(self, connection):
"""
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
and establish the tunnel if proxy is used.
"""

if isinstance(connection, VerifiedHTTPSConnection):
connection.set_cert(key_file=self.key_file,
cert_file=self.cert_file,
cert_reqs=self.cert_reqs,
ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)
connection.ssl_version = self.ssl_version

if self.proxy is not None:
# Python 2.7+
try:
set_tunnel = connection.set_tunnel
except AttributeError: # Platform-specific: Python 2.6
set_tunnel = connection._set_tunnel
set_tunnel(self.host, self.port, self.proxy_headers)
# Establish tunnel connection early, because otherwise httplib
# would improperly set Host: header to proxy's IP:port.
connection.connect()

return connection

def _new_conn(self):
"""
Return a fresh :class:`httplib.HTTPSConnection`.
Expand All @@ -549,26 +595,24 @@ def _new_conn(self):
log.info("Starting new HTTPS connection (%d): %s"
% (self.num_connections, self.host))

actual_host = self.host
actual_port = self.port
if self.proxy is not None:
actual_host = self.proxy.host
actual_port = self.proxy.port

if not ssl: # Platform-specific: Python compiled without +ssl
if not HTTPSConnection or HTTPSConnection is object:
raise SSLError("Can't connect to HTTPS URL because the SSL "
"module is not available.")
connection_class = HTTPSConnection
else:
connection_class = VerifiedHTTPSConnection

return HTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)
connection = connection_class(host=actual_host, port=actual_port,
strict=self.strict)

connection = VerifiedHTTPSConnection(host=self.host,
port=self.port,
strict=self.strict)
connection.set_cert(key_file=self.key_file, cert_file=self.cert_file,
cert_reqs=self.cert_reqs, ca_certs=self.ca_certs,
assert_hostname=self.assert_hostname,
assert_fingerprint=self.assert_fingerprint)

connection.ssl_version = self.ssl_version

return connection
return self._prepare_conn(connection)


def connection_from_url(url, **kw):
Expand Down
107 changes: 82 additions & 25 deletions requests/packages/urllib3/poolmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from ._collections import RecentlyUsedContainer
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connectionpool import connection_from_url, port_by_scheme
from .connectionpool import port_by_scheme
from .request import RequestMethods
from .util import parse_url

Expand Down Expand Up @@ -60,6 +60,8 @@ class PoolManager(RequestMethods):

"""

proxy = None

def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
RequestMethods.__init__(self, headers)
self.connection_pool_kw = connection_pool_kw
Expand Down Expand Up @@ -99,21 +101,23 @@ def connection_from_host(self, host, port=None, scheme='http'):
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``.
"""

scheme = scheme or 'http'

port = port or port_by_scheme.get(scheme, 80)

pool_key = (scheme, host, port)

with self.pools.lock:
# If the scheme, host, or port doesn't match existing open connections,
# open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool

# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool

# Make a fresh ConnectionPool of the desired type
pool = self._new_pool(scheme, host, port)
self.pools[pool_key] = pool
return pool

def connection_from_url(self, url):
Expand Down Expand Up @@ -145,7 +149,10 @@ def urlopen(self, method, url, redirect=True, **kw):
if 'headers' not in kw:
kw['headers'] = self.headers

response = conn.urlopen(method, u.request_uri, **kw)
if self.proxy is not None and u.scheme == "http":
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)

redirect_location = redirect and response.get_redirect_location()
if not redirect_location:
Expand All @@ -164,15 +171,59 @@ def urlopen(self, method, url, redirect=True, **kw):
return self.urlopen(method, redirect_location, **kw)


class ProxyManager(RequestMethods):
class ProxyManager(PoolManager):
"""
Given a ConnectionPool to a proxy, the ProxyManager's ``urlopen`` method
will make requests to any url through the defined proxy. The ProxyManager
class will automatically set the 'Host' header if it is not provided.
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.

:param poxy_url:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small typo here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say this is not an issue which needs to be solved for this PR. Can you send a PR to urllib3? :)

The URL of the proxy to be used.

:param proxy_headers:
A dictionary contaning headers that will be sent to the proxy. In case
of HTTP they are being sent with each request, while in the
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.

Example:
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
>>> r1 = proxy.request('GET', 'http://google.com/')
>>> r2 = proxy.request('GET', 'http://httpbin.org/')
>>> len(proxy.pools)
1
>>> r3 = proxy.request('GET', 'https://httpbin.org/')
>>> r4 = proxy.request('GET', 'https://twitter.com/')
>>> len(proxy.pools)
3

"""

def __init__(self, proxy_pool):
self.proxy_pool = proxy_pool
def __init__(self, proxy_url, num_pools=10, headers=None,
proxy_headers=None, **connection_pool_kw):

if isinstance(proxy_url, HTTPConnectionPool):
proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
proxy_url.port)
proxy = parse_url(proxy_url)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
assert self.proxy.scheme in ("http", "https"), \
'Not supported proxy scheme %s' % self.proxy.scheme
connection_pool_kw['_proxy'] = self.proxy
connection_pool_kw['_proxy_headers'] = self.proxy_headers
super(ProxyManager, self).__init__(
num_pools, headers, **connection_pool_kw)

def connection_from_host(self, host, port=None, scheme='http'):
if scheme == "https":
return super(ProxyManager, self).connection_from_host(
host, port, scheme)

return super(ProxyManager, self).connection_from_host(
self.proxy.host, self.proxy.port, self.proxy.scheme)

def _set_proxy_headers(self, url, headers=None):
"""
Expand All @@ -187,16 +238,22 @@ def _set_proxy_headers(self, url, headers=None):

if headers:
headers_.update(headers)

return headers_

def urlopen(self, method, url, **kw):
def urlopen(self, method, url, redirect=True, **kw):
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
kw['assert_same_host'] = False
kw['headers'] = self._set_proxy_headers(url, headers=kw.get('headers'))
return self.proxy_pool.urlopen(method, url, **kw)
u = parse_url(url)

if u.scheme == "http":
# It's too late to set proxy headers on per-request basis for
# tunnelled HTTPS connections, should use
# constructor's proxy_headers instead.
kw['headers'] = self._set_proxy_headers(url, kw.get('headers',
self.headers))
kw['headers'].update(self.proxy_headers)

return super(ProxyManager, self).urlopen(method, url, redirect, **kw)


def proxy_from_url(url, **pool_kw):
proxy_pool = connection_from_url(url, **pool_kw)
return ProxyManager(proxy_pool)
def proxy_from_url(url, **kw):
return ProxyManager(proxy_url=url, **kw)
2 changes: 1 addition & 1 deletion requests/packages/urllib3/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def read(self, amt=None, decode_content=None, cache_content=False):
"failed to decode it." % content_encoding,
e)

if flush_decoder and self._decoder:
if flush_decoder and decode_content and self._decoder:
buf = self._decoder.decompress(binary_type())
data += buf + self._decoder.flush()

Expand Down