Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better detection is_logged_in #473

Merged
merged 1 commit into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions tests/data/logged_in_failed_explicit.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>This is a test page</title>
</head>
<body>

Welcome. Failed to login, please try again.

</body>
</html>
12 changes: 12 additions & 0 deletions tests/data/logged_in_failed_implicit.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>This is a test page</title>
</head>
<body>

Welcome. Unfortunately, login didn't succced

</body>
</html>
12 changes: 11 additions & 1 deletion tests/parsers/test_html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,17 @@ def test_base_other_links():

assert page.html_redirections == ["http://perdu.com/blog/adblock.html"]

def test_logged_in():
def test_logged_in_success():
with open("tests/data/logged_in.html") as data_body:
page = Html(data_body.read(), "http://perdu.com/index.php")
assert page.is_logged_in()

def test_logged_in_failed_implicit():
with open("tests/data/logged_in_failed_implicit.html") as data_body:
page = Html(data_body.read(), "http://perdu.com/index.php")
assert not page.is_logged_in()

def test_logged_in_failed_explicit():
with open("tests/data/logged_in_failed_explicit.html") as data_body:
page = Html(data_body.read(), "http://perdu.com/index.php")
assert not page.is_logged_in()
18 changes: 17 additions & 1 deletion wapitiCore/parsers/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@
from wapitiCore.parsers.javascript import extract_js_redirections

DISCONNECT_REGEX = r'(?i)((log|sign)\s?(out|off)|disconnect|déconnexion)'
CONNECT_ERROR_REGEX = r'(invalid|'\
r'authentication failed|'\
r'denied|'\
r'incorrect|'\
r'failed|'\
r'not found|'\
r'expired|'\
r'try again|'\
r'captcha|'\
r'two-factors|'\
r'verify your email|'\
r'erreur)'


def not_empty(original_function):
Expand Down Expand Up @@ -612,5 +624,9 @@ def extract_disconnect_urls(self) -> List[str]:
disconnect_urls.append(link)
return disconnect_urls

def is_logged_in(self):
def is_logged_in(self) -> bool:
# If we find logging errors on the page
if self._soup.find(string=re.compile(CONNECT_ERROR_REGEX)) is not None:
return False
# If we find a disconnect button on the page
return self._soup.find(string=re.compile(DISCONNECT_REGEX)) is not None