Skip to content

Commit

Permalink
fix: automatically convert base_url from file url
Browse files Browse the repository at this point in the history
The base url for prepend_url had undefined behavior if the url pointed
to a file (e.g., did not end with a /). Now this is detected and will be
converted.
`http://www.domain.com/path/to` -> `http://www.domain.com/path/to/`

This was necessary so the appended url would be added to the path.
`http://www.domain.com/path/to/` + `new_path` ->
`http://www.domain.com/path/to/new_path` instead of
`http://www.domain.com/path/tonew_path

alandtse/alexa_media_player#2111
  • Loading branch information
alandtse committed Nov 29, 2023
1 parent 3d38f80 commit 08a396d
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 33 deletions.
7 changes: 5 additions & 2 deletions authcaptureproxy/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,22 @@ def prepend_url(base_url: URL, url: URL, encoded: bool = False) -> URL:
"""Prepend the url.
Args:
base_url (URL): Base URL to prepend
base_url (URL): Base URL to prepend. The URL must end with a "/" so it's a folder or domain.
url (URL): url to prepend
encoded (bool): Whether to treat the url as already encoded. This may be needed if the url is JavaScript.
"""
if isinstance(base_url, str):
base_url = URL(base_url)
if isinstance(url, str):
url = URL(url)
if base_url.name:
_LOGGER.warn("Base URL is to file %s, treating as path", base_url.name)
base_url = base_url.with_path(f"{base_url.path}/")
if not url.is_absolute():
query = url.query
path = url.path
return base_url.with_path(
re.sub(r"/+", "/", f"{base_url.path}{path}"), encoded=encoded
re.sub(r"/+", "/", f"{base_url.path}/{path}"), encoded=encoded
).with_query(query)
return url

Expand Down
66 changes: 35 additions & 31 deletions tests/examples/test_modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
HOST_URL = "https://www.host.com"
HOST_URL_BACKSLASH = "https://www.host.com/"
HOST_URL_WITH_PATH = "https://www.host.com/auth/path/test?attr=asdf"
HOST_URL_WITH_PATH_BACKSLASH = "https://www.host.com/auth/path/test/?attr=asdf"
RELATIVE_URLS = ["asdf/asbklahef", "/root/asdf/b", "/root/asdf/b/", "asdf/asbklahef/", "asdf/"]
ABSOLUTE_URLS = [
PROXY_URL,
Expand Down Expand Up @@ -203,38 +204,41 @@ async def test_replace_empty_action_urls_empty():
@pytest.mark.asyncio
async def test_prepend_relative_urls():
"""Test prepend_relative_urls."""
start_url = random.choice(RELATIVE_URLS) # nosec
for form in [
FORM,
FORM_WITH_DATA,
FORM_NO_NAME,
FORM_NO_ID,
build_random_html(url=start_url),
FORM_WITH_EMPTY_ACTION,
FORM_WITH_VERIFY_ACTION,
]:
for url in [
HOST_URL,
HOST_URL_WITH_PATH,
PROXY_URL,
PROXY_URL_WITH_PATH,
HOST_URL_BACKSLASH,
for start_url in RELATIVE_URLS:
for form in [
FORM,
FORM_WITH_DATA,
FORM_NO_NAME,
FORM_NO_ID,
build_random_html(url=start_url),
FORM_WITH_EMPTY_ACTION,
FORM_WITH_VERIFY_ACTION,
]:
result = await modifiers.prepend_relative_urls(url, form)
old_soup = bs(form, "html.parser")
soup = bs(result, "html.parser")
for tag, attribute in KNOWN_URLS_ATTRS.items():
if old_soup.find(tag) and old_soup.find(tag).get(attribute) is not None:
old_url = old_soup.find(tag).get(attribute)
new_url = soup.find(tag).get(attribute)
if URL(old_url).is_absolute():
assert new_url != old_url
assert start_url == old_url
else:
assert URL(new_url).is_absolute()
assert new_url.startswith(str(URL(url).with_query({})))
if old_url:
assert new_url.endswith(old_url)
for url in [
HOST_URL,
HOST_URL_WITH_PATH,
PROXY_URL,
PROXY_URL_WITH_PATH,
HOST_URL_BACKSLASH,
HOST_URL_WITH_PATH_BACKSLASH,
PROXY_URL_WITH_PATH_BACKSLASH,
]:
result = await modifiers.prepend_relative_urls(url, form)
old_soup = bs(form, "html.parser")
soup = bs(result, "html.parser")
for tag, attribute in KNOWN_URLS_ATTRS.items():
if old_soup.find(tag) and old_soup.find(tag).get(attribute) is not None:
old_url = old_soup.find(tag).get(attribute)
new_url = soup.find(tag).get(attribute)
if URL(old_url).is_absolute():
assert new_url != old_url
assert start_url == old_url
else:
assert URL(new_url).is_absolute()
assert new_url.startswith(str(URL(url).parent))
if old_url:
assert new_url.endswith(old_url)
assert URL(new_url).name in URL(old_url).name


@pytest.mark.asyncio
Expand Down

0 comments on commit 08a396d

Please sign in to comment.