Skip to content

bugs: fix proxy #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,15 @@ Explore the full capabilities of TLS Requests in the documentation:


Read the documentation: [**thewebscraping.github.io/tls-requests/**](https://thewebscraping.github.io/tls-requests/)

**Report Issues**
-----------------

Found a bug? Please [open an issue](https://github.com/thewebscraping/tls-requests/issues/).

By reporting an issue you help improve the project.

**Credits**
-----------------

Special thanks to [bogdanfinn](https://github.com/bogdanfinn/) for creating the awesome [tls-client](https://github.com/bogdanfinn/tls-client).
49 changes: 49 additions & 0 deletions tests/test_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import tls_requests


def test_http_proxy():
proxy = tls_requests.Proxy("http://localhost:8080")
assert proxy.scheme == "http"
assert proxy.host == "localhost"
assert proxy.port == '8080'
assert proxy.url == "http://localhost:8080"


def test_https_proxy():
proxy = tls_requests.Proxy("https://localhost:8080")
assert proxy.scheme == "https"
assert proxy.host == "localhost"
assert proxy.port == '8080'
assert proxy.url == "https://localhost:8080"


def test_socks5_proxy():
proxy = tls_requests.Proxy("socks5://localhost:8080")
assert proxy.scheme == "socks5"
assert proxy.host == "localhost"
assert proxy.port == '8080'
assert proxy.url == "socks5://localhost:8080"


def test_proxy_with_params():
proxy = tls_requests.Proxy("http://localhost:8080?a=b", params={"foo": "bar"})
assert proxy.scheme == "http"
assert proxy.host == "localhost"
assert proxy.port == '8080'
assert proxy.url == "http://localhost:8080"


def test_auth_proxy():
proxy = tls_requests.Proxy("http://username:password@localhost:8080")
assert proxy.scheme == "http"
assert proxy.host == "localhost"
assert proxy.port == '8080'
assert proxy.auth == ("username", "password")
assert proxy.url == "http://username:password@localhost:8080"


def test_unsupported_proxy_scheme():
try:
_ = tls_requests.Proxy("unknown://localhost:8080")
except Exception as e:
assert isinstance(e, tls_requests.exceptions.ProxyError)
12 changes: 12 additions & 0 deletions tests/test_redirects.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def test_missing_host_redirects(httpserver: HTTPServer):
httpserver.expect_request("/redirects/ok").respond_with_data(b"OK")
response = tls_requests.get(httpserver.url_for("/redirects/3"))
assert response.status_code == 200
assert response.history[0].status_code == 302
assert len(response.history) == 3


Expand All @@ -20,9 +21,20 @@ def test_full_path_redirects(httpserver: HTTPServer):
httpserver.expect_request("/redirects/ok").respond_with_data(b"OK")
response = tls_requests.get(httpserver.url_for("/redirects/3"))
assert response.status_code == 200
assert response.history[0].status_code == 302
assert len(response.history) == 3


def test_fragment_redirects(httpserver: HTTPServer):
httpserver.expect_request("/redirects/3").respond_with_data(b"OK", status=302, headers={"Location": httpserver.url_for("/redirects/ok#fragment")})
httpserver.expect_request("/redirects/ok").respond_with_data(b"OK")
response = tls_requests.get(httpserver.url_for("/redirects/3"))
assert response.status_code == 200
assert response.history[0].status_code == 302
assert len(response.history) == 1
assert response.request.url.fragment == "fragment"


def test_too_many_redirects(httpserver: HTTPServer):
httpserver.expect_request("/redirects/3").respond_with_data(b"OK", status=302, headers={"Location": "/redirects/1"})
httpserver.expect_request("/redirects/1").respond_with_data(b"OK", status=302, headers={"Location": "/redirects/2"})
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion tls_requests/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
__url__ = "https://github.com/thewebscraping/tls-requests"
__author__ = "Tu Pham"
__author_email__ = "thetwofarm@gmail.com"
__version__ = "1.0.6"
__version__ = "1.0.7"
__license__ = "MIT"
37 changes: 17 additions & 20 deletions tls_requests/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import (Any, Callable, Literal, Mapping, Optional, Sequence,
TypeVar, Union)

from .exceptions import RemoteProtocolError, TooManyRedirects
from .exceptions import ProxyError, RemoteProtocolError, TooManyRedirects
from .models import (URL, Auth, BasicAuth, Cookies, Headers, Proxy, Request,
Response, StatusCodes, TLSClient, TLSConfig, URLParams)
from .settings import (DEFAULT_FOLLOW_REDIRECTS, DEFAULT_HEADERS,
Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
self._headers = Headers(headers)
self._hooks = hooks if isinstance(hooks, dict) else {}
self.auth = auth
self.proxy = Proxy(url=proxy) if isinstance(proxy, (str, URL)) else proxy
self.proxy = self.prepare_proxy(proxy)
self.timeout = timeout
self.follow_redirects = follow_redirects
self.max_redirects = max_redirects
Expand Down Expand Up @@ -194,27 +194,23 @@ def prepare_params(self, params: URLParamTypes = None) -> URLParams:
merged_params = self.params.copy()
return merged_params.update(params)

def prepare_proxy(self, proxy: ProxyTypes = None) -> Optional[Proxy]:
if proxy is not None:
if isinstance(proxy, (bytes, str, URL, Proxy)):
return Proxy(proxy)

raise ProxyError("Invalid proxy.")

def prepare_config(self, request: Request):
"""Prepare TLS Config"""

proxy = None
if self.proxy and isinstance(self.proxy, Proxy):
proxy = self.proxy.url
if self.proxy.auth:
proxy = "%s://%s@%s:%s" % (
self.proxy.url.scheme,
":".join(self.proxy.auth),
self.proxy.url.host,
self.proxy.url.port,
)

config = self.config.copy_with(
method=request.method,
url=str(request.url),
url=request.url,
body=request.read(),
headers=dict(request.headers),
cookies=[dict(name=k, value=v) for k, v in request.cookies.items()],
proxy=proxy,
proxy=request.proxy.url if request.proxy else None,
timeout=request.timeout,
http2=True if self.http2 in ["auto", "http2", True, None] else False,
verify=self.verify,
Expand Down Expand Up @@ -249,6 +245,7 @@ def build_request(
params=self.prepare_params(params),
headers=self.prepare_headers(headers),
cookies=self.prepare_cookies(cookies),
proxy=self.proxy,
timeout=timeout or self.timeout,
)

Expand Down Expand Up @@ -313,15 +310,14 @@ def _rebuild_redirect_url(self, request: Request, response: Response) -> URL:
except KeyError:
raise RemoteProtocolError("Invalid URL in Location headers: %s" % e)

for missing_field in ["scheme", "host", "port", "fragment"]:
private_field = "_%s" % missing_field
if not getattr(url, private_field, None):
setattr(url, private_field, getattr(request.url, private_field, ""))
if not url.netloc:
for missing_field in ["scheme", "host", "port"]:
setattr(url, missing_field, getattr(request.url, missing_field, ""))

# TLS error transport between HTTP/1.x -> HTTP/2
if url.scheme != request.url.scheme:
if request.url.scheme == "http":
url._scheme = request.url.scheme
url.scheme = request.url.scheme
else:
if self.http2 in ["auto", None]:
self.session.destroy_session(self.config.sessionId)
Expand All @@ -331,6 +327,7 @@ def _rebuild_redirect_url(self, request: Request, response: Response) -> URL:
"Switching remote scheme from HTTP/2 to HTTP/1 is not supported. Please initialize Client with parameter `http2` to `auto`."
)

setattr(url, "_url", None) # reset url
if not url.url:
raise RemoteProtocolError("Invalid URL in Location headers: %s" % e)

Expand Down
8 changes: 5 additions & 3 deletions tls_requests/models/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from tls_requests.models.cookies import Cookies
from tls_requests.models.encoders import StreamEncoder
from tls_requests.models.headers import Headers
from tls_requests.models.urls import URL
from tls_requests.models.urls import URL, Proxy
from tls_requests.settings import DEFAULT_TIMEOUT
from tls_requests.types import (CookieTypes, HeaderTypes, MethodTypes,
RequestData, RequestFiles, TimeoutTypes,
URLParamTypes, URLTypes)
ProxyTypes, RequestData, RequestFiles,
TimeoutTypes, URLParamTypes, URLTypes)

__all__ = ["Request"]

Expand All @@ -24,13 +24,15 @@ def __init__(
params: URLParamTypes = None,
headers: HeaderTypes = None,
cookies: CookieTypes = None,
proxy: ProxyTypes = None,
timeout: TimeoutTypes = None,
) -> None:
self._content = None
self._session_id = None
self.url = URL(url, params=params)
self.method = method.upper()
self.cookies = Cookies(cookies)
self.proxy = Proxy(proxy) if proxy else None
self.timeout = timeout if isinstance(timeout, (float, int)) else DEFAULT_TIMEOUT
self.stream = StreamEncoder(data, files, json)
self.headers = self._prepare_headers(headers)
Expand Down
Loading
Loading