Skip to content

Commit

Permalink
Adding ural.utils.add_get_param
Browse files Browse the repository at this point in the history
Related to #155
  • Loading branch information
Yomguithereal committed Apr 12, 2023
1 parent bc21ffb commit 4df10d9
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 23 deletions.
45 changes: 29 additions & 16 deletions scripts/get_param_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,65 @@
from ural.utils import safe_urlsplit
from urllib.parse import urlunsplit


def add_param_by_splitting(url, key, value):
param = '{}={}'.format(key, value)
param = "{}={}".format(key, value)

fragment = None
query = None

if '#' in url:
url, fragment = url.rsplit('#', 1)
if "#" in url:
url, fragment = url.rsplit("#", 1)

if '?' in url:
url, query = url.rsplit('?', 1)
if "?" in url:
url, query = url.rsplit("?", 1)

if query:
query += '&' + param
query += "&" + param
elif query is not None:
query = param

url += '?' + query
url += "?" + query

if fragment is not None:
url += '#' + fragment
url += "#" + fragment

return url


def add_param_by_urlsplit(url, key, value):
param = '{}={}'.format(key, value)
param = "{}={}".format(key, value)

splitted = safe_urlsplit(url)

query = splitted.query

if query:
query += '&' + param
query += "&" + param
else:
query = param

return urlunsplit((splitted.scheme, splitted.netloc, splitted.path, splitted.query, splitted.fragment))

return urlunsplit(
(
splitted.scheme,
splitted.netloc,
splitted.path,
splitted.query,
splitted.fragment,
)
)


N = 100_000

with Timer('split'):
with Timer("split"):
for _ in range(N):
add_param_by_splitting('http://www.lemonde.fr/path/to/article.html?test#ok', 'hello', 'world')
add_param_by_splitting(
"http://www.lemonde.fr/path/to/article.html?test#ok", "hello", "world"
)

with Timer('split'):
with Timer("split"):
for _ in range(N):
add_param_by_urlsplit('http://www.lemonde.fr/path/to/article.html?test#ok', 'hello', 'world')
add_param_by_urlsplit(
"http://www.lemonde.fr/path/to/article.html?test#ok", "hello", "world"
)
2 changes: 1 addition & 1 deletion test/urls_from_text_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"This is a link: [https://youtu.be/rLZ2ZzoD-W0](https://youtu.be/rLZ2ZzoD-W0?fbclid=IwAR3RdUNf4_yyYxIBbAspDj-86ckbpS9gjv3tn2rhYspmFJuSl_dlkD7AgyU)",
[
"https://youtu.be/rLZ2ZzoD-W0",
"https://youtu.be/rLZ2ZzoD-W0?fbclid=IwAR3RdUNf4_yyYxIBbAspDj-86ckbpS9gjv3tn2rhYspmFJuSl_dlkD7AgyU"
"https://youtu.be/rLZ2ZzoD-W0?fbclid=IwAR3RdUNf4_yyYxIBbAspDj-86ckbpS9gjv3tn2rhYspmFJuSl_dlkD7AgyU",
],
),
]
Expand Down
37 changes: 36 additions & 1 deletion test/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
# =============================================================================
from __future__ import unicode_literals

from ural.utils import pathsplit, urlpathsplit, decode_punycode_hostname, safe_urlsplit
from ural.utils import (
pathsplit,
urlpathsplit,
decode_punycode_hostname,
safe_urlsplit,
add_get_param,
)

PATHSPLIT_TESTS = [
("", []),
Expand Down Expand Up @@ -34,3 +40,32 @@ def test_decode_punycode_hostname(self):
== "business.télérama.fr"
)
assert decode_punycode_hostname("xN--tlrama-bvab.fr") == "télérama.fr"

def test_add_get_param(self):
assert add_get_param("http://lemonde.fr", "test") == "http://lemonde.fr?test"
assert (
add_get_param("http://lemonde.fr", "test", "val")
== "http://lemonde.fr?test=val"
)
assert (
add_get_param("http://lemonde.fr#anchor", "test", "val")
== "http://lemonde.fr?test=val#anchor"
)
assert (
add_get_param("http://lemonde.fr?hello=world#anchor", "test", "val")
== "http://lemonde.fr?hello=world&test=val#anchor"
)
assert (
add_get_param("http://lemonde.fr?hello=world", "test", "val")
== "http://lemonde.fr?hello=world&test=val"
)
assert (
add_get_param("http://lemonde.fr?hello=world&one=two", "test", "val")
== "http://lemonde.fr?hello=world&one=two&test=val"
)
assert (
add_get_param("http://lemonde.fr?hello&one=two", "test", "val")
== "http://lemonde.fr?hello&one=two&test=val"
)
assert add_get_param("lemonde.fr", "test", "val") == "lemonde.fr?test=val"
assert add_get_param("lemonde.fr", "test", 45) == "lemonde.fr?test=45"
2 changes: 1 addition & 1 deletion ural/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@
)
from ural.urls_from_text import urls_from_text
from ural.urls_from_html import urls_from_html
from ural.utils import safe_urlsplit, urlpathsplit, pathsplit
from ural.utils import safe_urlsplit, urlpathsplit, pathsplit, add_get_param
6 changes: 3 additions & 3 deletions ural/urls_from_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def urls_from_text(string):
url = match.group(0)
s = match.start()

if s > 0 and string[s - 1] == '[':
if '](' in url:
remainder, url = url.split('](', 1)
if s > 0 and string[s - 1] == "[":
if "](" in url:
remainder, url = url.split("](", 1)
yield remainder.strip()

last_punct = None
Expand Down
29 changes: 29 additions & 0 deletions ural/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,32 @@ def fix_common_query_mistakes(query):

def safe_parse_qs(query):
return parse_qs(fix_common_query_mistakes(query))


def add_get_param(url, name, value=None):
param = name if value is None else "{}={}".format(name, value)

query = None
fragment = None

s = url.rsplit("#", 1)

if len(s) > 1:
url, fragment = s

s = url.rsplit("?", 1)

if len(s) > 1:
url, query = s

if query:
query += "&" + param
else:
query = param

url += "?" + query

if fragment is not None:
url += "#" + fragment

return url
3 changes: 2 additions & 1 deletion ural/utils.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Dict, List, overload
from typing import Optional, Union, Dict, List, overload
from urllib.parse import SplitResult
from ural.types import AnyUrlTarget, Literal

Expand All @@ -16,3 +16,4 @@ def decode_punycode_hostname(
) -> List[str]: ...
def fix_common_query_mistakes(query: str) -> str: ...
def safe_parse_qs(query: str) -> Dict[str, List[str]]: ...
def add_get_param(url: str, name: str, value: Union[str, int]) -> str: ...

0 comments on commit 4df10d9

Please sign in to comment.