Skip to content

Commit a575bb0

Browse files
committed
post-request session validity checking
Now that the session is persistent between runs, the assumption that a live session will always be also valid no longer holds - session can get stale and the backend will only let us know that after a request is made with it. If a request requires a live and authenticated session but the current instance is stale, backend will not return the expected data but instead give a generic response containing a specifically named span. Therefore, locating that span in the response serves as a good check for post-request session validity check.
1 parent 7e33b17 commit a575bb0

File tree

1 file changed

+50
-20
lines changed

1 file changed

+50
-20
lines changed

filmatyk/filmweb.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
ConnectionError = requests_html.requests.ConnectionError
1414

1515

16+
class UnauthenticatedError(ConnectionError):
17+
"""Raised by API functions if they detect the active session was refused."""
18+
19+
1620
class Constants():
1721
"""URLs and HTML component names for data acquisition.
1822
@@ -24,6 +28,7 @@ class Constants():
2428
item_class = 'userVotesPage__result'
2529
rating_source = 'userVotes'
2630
rating_stype = 'application/json'
31+
no_access_class = 'noResultsPlaceholder'
2732
movie_count_span = 'blockHeader__titleInfoCount'
2833
series_count_span = 'blockHeader__titleInfoCount'
2934
game_count_span = 'blockHeader__titleInfoCount'
@@ -70,9 +75,17 @@ def login(username, password):
7075
return (True, session)
7176

7277
def enforceSession(fun):
73-
"""Decorator to mark API functions that require a live session.
78+
"""Decorator to mark API functions that require an authenticated session.
79+
80+
This safeguards the calls to ensure they do not fail due to a lack of
81+
authentication with Filmweb. To achieve this goal, two checks are made:
82+
* before calling the decorated function, a check whether a live HTMLSession
83+
exists is made; if not, a login is requested,
84+
* the call itself is guarded against UnauthenticatedError, also resulting
85+
in a request for login and re-calling of the function.
86+
Additionally, session cookies are watched for changes, in order to set the
87+
isDirty flag in case that happens.
7488
75-
It will perform a session check before calling the actual function.
7689
Because it assumes that the first argument of the wrapped function is
7790
a bound FilmwebAPI instance ("self"), it shall only be used with FilmwebAPI
7891
methods.
@@ -82,21 +95,30 @@ def enforceSession(fun):
8295
https://stackoverflow.com/q/21382801/6919631
8396
https://stackoverflow.com/q/11058686/6919631
8497
The bottom line is that it should NEVER be called directly.
85-
86-
Also checks if the session cookies were changed in the process of making
87-
a request.
8898
"""
8999
def wrapper(*args, **kwargs):
100+
# Extract the bound FilmwebAPI instance
90101
self = args[0]
91-
if self.checkSession():
92-
old_cookies = set(self.session.cookies.values())
93-
result = fun(*args, **kwargs)
94-
new_cookies = set(self.session.cookies.values())
95-
if old_cookies != new_cookies:
96-
self.isDirty = True
97-
return result
98-
else:
102+
# First check: for presence of a live session
103+
if not self.checkSession():
99104
return None
105+
old_cookies = set(self.session.cookies.values())
106+
# Second check: whether the call failed due to lack of authentication
107+
try:
108+
result = fun(*args, **kwargs)
109+
except UnauthenticatedError:
110+
# Request login and call again
111+
print('Session was stale! Requesting login...')
112+
self.requestSession()
113+
if not self.session:
114+
return None
115+
result = fun(*args, **kwargs)
116+
# Session change detection
117+
new_cookies = set(self.session.cookies.values())
118+
if old_cookies != new_cookies:
119+
self.isDirty = True
120+
# Finally the produced data is returned
121+
return result
100122
return wrapper
101123

102124
def __init__(self, login_handler, username:str=''):
@@ -156,11 +178,7 @@ def __cacheParsingRules(self, itemtype:str):
156178
self.parsingRules[itemtype] = pTree
157179

158180
def checkSession(self):
159-
"""Check if there exists a live session and acquire a new one if not.
160-
#TODO: now with improved session handling we need something smarter
161-
(cause we'll nearly always have a session, except it might sometimes get stale
162-
resulting in an acquisition failure)
163-
"""
181+
"""Check if there exists a session instance and acquire a new one if not."""
164182
session_requested = False
165183
if not self.session:
166184
self.requestSession()
@@ -254,7 +272,13 @@ def getItemsPage(self, itemtype:str, page:int=1):
254272

255273
@enforceSession
256274
def fetchPage(self, url):
257-
"""Fetch the page and return its BeautifulSoup representation."""
275+
"""Fetch the page and return its BeautifulSoup representation.
276+
277+
ConnectionError is raised in case of any failure to get HTML data or page
278+
status being not-ok after get.
279+
UnauthenticatedError is raised if the response contains a span indicating
280+
that the session used to obtain it is no longer valid.
281+
"""
258282
try:
259283
page = self.session.get(url)
260284
except:
@@ -264,7 +288,13 @@ def fetchPage(self, url):
264288
print("FETCH ERROR {}".format(status))
265289
raise ConnectionError
266290
else:
267-
return BS(page.html.html, 'lxml')
291+
bspage = BS(page.html.html, 'lxml')
292+
# If a request required an active session but the one we had happened to be
293+
# stale, this magical span will be found in the page data:
294+
span = bspage.find('span', attrs={'class': self.constants.no_access_class})
295+
if span:
296+
raise UnauthenticatedError
297+
return bspage
268298

269299
def parsePage(self, page, itemtype:str):
270300
"""Parse items and ratings, returning constructed Item objects."""

0 commit comments

Comments
 (0)