Skip to content

Commit

Permalink
Fixed BongaCams, links parsing improved (#297)
Browse files Browse the repository at this point in the history
* Fixed BongaCams, links parsing improved

* Fixed tests
  • Loading branch information
soxoj authored Jan 8, 2022
1 parent 3e884d4 commit 5c05cfa
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
12 changes: 9 additions & 3 deletions maigret/resources/data.json
Original file line number Diff line number Diff line change
Expand Up @@ -2144,11 +2144,17 @@
"cz",
"webcam"
],
"checkType": "status_code",
"absenceStrs": [
"<link rel=\"preconnect\""
],
"presenseStrs": [
"<title>Informa\u00e7\u00e3o e p\u00e1gina"
],
"checkType": "message",
"alexaRank": 30,
"urlMain": "https://pt.bongacams.com",
"url": "https://pt.bongacams.com/profile/{username}",
"usernameClaimed": "asuna-black",
"usernameClaimed": "Icehotangel",
"usernameUnclaimed": "noonewouldeverusethis77777"
},
"Bookandreader": {
Expand Down Expand Up @@ -13126,7 +13132,7 @@
"checkType": "response_url",
"alexaRank": 82345,
"urlMain": "http://sprashivai.ru",
"url": "http://sprashivai.ru/{username}?sl",
"url": "http://sprashivai.ru/{username}",
"usernameClaimed": "red",
"usernameUnclaimed": "noonewouldeverusethis7"
},
Expand Down
4 changes: 2 additions & 2 deletions maigret/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def enrich_link_str(link: str) -> str:


class URLMatcher:
_HTTP_URL_RE_STR = "^https?://(www.)?(.+)$"
_HTTP_URL_RE_STR = "^https?://(www.|m.)?(.+)$"
HTTP_URL_RE = re.compile(_HTTP_URL_RE_STR)
UNSAFE_SYMBOLS = ".?"

Expand All @@ -66,7 +66,7 @@ def make_profile_url_regexp(self, url: str, username_regexp: str = ""):
)
regexp_str = self._HTTP_URL_RE_STR.replace("(.+)", url_regexp)

return re.compile(regexp_str)
return re.compile(regexp_str, re.IGNORECASE)


def ascii_data_display(data: str) -> Any:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_sites.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_site_url_detector():

assert (
db.sites[0].url_regexp.pattern
== r'^https?://(www.)?forum\.amperka\.ru/members/\?username=(.+?)$'
== r'^https?://(www.|m.)?forum\.amperka\.ru/members/\?username=(.+?)$'
)
assert (
db.sites[0].detect_username('http://forum.amperka.ru/members/?username=test')
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_url_make_profile_url_regexp():
# ensure all combinations match pattern
assert (
URLMatcher.make_profile_url_regexp(url).pattern
== r'^https?://(www.)?flickr\.com/photos/(.+?)$'
== r'^https?://(www.|m.)?flickr\.com/photos/(.+?)$'
)


Expand Down

0 comments on commit 5c05cfa

Please sign in to comment.