13
13
ConnectionError = requests_html .requests .ConnectionError
14
14
15
15
16
+ class UnauthenticatedError (ConnectionError ):
17
+ """Raised by API functions if they detect the active session was refused."""
18
+
19
+
16
20
class Constants ():
17
21
"""URLs and HTML component names for data acquisition.
18
22
@@ -24,6 +28,7 @@ class Constants():
24
28
item_class = 'userVotesPage__result'
25
29
rating_source = 'userVotes'
26
30
rating_stype = 'application/json'
31
+ no_access_class = 'noResultsPlaceholder'
27
32
movie_count_span = 'blockHeader__titleInfoCount'
28
33
series_count_span = 'blockHeader__titleInfoCount'
29
34
game_count_span = 'blockHeader__titleInfoCount'
@@ -70,9 +75,17 @@ def login(username, password):
70
75
return (True , session )
71
76
72
77
def enforceSession (fun ):
73
- """Decorator to mark API functions that require a live session.
78
+ """Decorator to mark API functions that require an authenticated session.
79
+
80
+ This safeguards the calls to ensure they do not fail due to a lack of
81
+ authentication with Filmweb. To achieve this goal, two checks are made:
82
+ * before calling the decorated function, a check whether a live HTMLSession
83
+ exists is made; if not, a login is requested,
84
+ * the call itself is guarded against UnauthenticatedError, also resulting
85
+ in a request for login and re-calling of the function.
86
+ Additionally, session cookies are watched for changes, in order to set the
87
+ isDirty flag in case that happens.
74
88
75
- It will perform a session check before calling the actual function.
76
89
Because it assumes that the first argument of the wrapped function is
77
90
a bound FilmwebAPI instance ("self"), it shall only be used with FilmwebAPI
78
91
methods.
@@ -82,21 +95,30 @@ def enforceSession(fun):
82
95
https://stackoverflow.com/q/21382801/6919631
83
96
https://stackoverflow.com/q/11058686/6919631
84
97
The bottom line is that it should NEVER be called directly.
85
-
86
- Also checks if the session cookies were changed in the process of making
87
- a request.
88
98
"""
89
99
def wrapper (* args , ** kwargs ):
100
+ # Extract the bound FilmwebAPI instance
90
101
self = args [0 ]
91
- if self .checkSession ():
92
- old_cookies = set (self .session .cookies .values ())
93
- result = fun (* args , ** kwargs )
94
- new_cookies = set (self .session .cookies .values ())
95
- if old_cookies != new_cookies :
96
- self .isDirty = True
97
- return result
98
- else :
102
+ # First check: for presence of a live session
103
+ if not self .checkSession ():
99
104
return None
105
+ old_cookies = set (self .session .cookies .values ())
106
+ # Second check: whether the call failed due to lack of authentication
107
+ try :
108
+ result = fun (* args , ** kwargs )
109
+ except UnauthenticatedError :
110
+ # Request login and call again
111
+ print ('Session was stale! Requesting login...' )
112
+ self .requestSession ()
113
+ if not self .session :
114
+ return None
115
+ result = fun (* args , ** kwargs )
116
+ # Session change detection
117
+ new_cookies = set (self .session .cookies .values ())
118
+ if old_cookies != new_cookies :
119
+ self .isDirty = True
120
+ # Finally the produced data is returned
121
+ return result
100
122
return wrapper
101
123
102
124
def __init__ (self , login_handler , username :str = '' ):
@@ -156,11 +178,7 @@ def __cacheParsingRules(self, itemtype:str):
156
178
self .parsingRules [itemtype ] = pTree
157
179
158
180
def checkSession (self ):
159
- """Check if there exists a live session and acquire a new one if not.
160
- #TODO: now with improved session handling we need something smarter
161
- (cause we'll nearly always have a session, except it might sometimes get stale
162
- resulting in an acquisition failure)
163
- """
181
+ """Check if there exists a session instance and acquire a new one if not."""
164
182
session_requested = False
165
183
if not self .session :
166
184
self .requestSession ()
@@ -254,7 +272,13 @@ def getItemsPage(self, itemtype:str, page:int=1):
254
272
255
273
@enforceSession
256
274
def fetchPage (self , url ):
257
- """Fetch the page and return its BeautifulSoup representation."""
275
+ """Fetch the page and return its BeautifulSoup representation.
276
+
277
+ ConnectionError is raised in case of any failure to get HTML data or page
278
+ status being not-ok after get.
279
+ UnauthenticatedError is raised if the response contains a span indicating
280
+ that the session used to obtain it is no longer valid.
281
+ """
258
282
try :
259
283
page = self .session .get (url )
260
284
except :
@@ -264,7 +288,13 @@ def fetchPage(self, url):
264
288
print ("FETCH ERROR {}" .format (status ))
265
289
raise ConnectionError
266
290
else :
267
- return BS (page .html .html , 'lxml' )
291
+ bspage = BS (page .html .html , 'lxml' )
292
+ # If a request required an active session but the one we had happened to be
293
+ # stale, this magical span will be found in the page data:
294
+ span = bspage .find ('span' , attrs = {'class' : self .constants .no_access_class })
295
+ if span :
296
+ raise UnauthenticatedError
297
+ return bspage
268
298
269
299
def parsePage (self , page , itemtype :str ):
270
300
"""Parse items and ratings, returning constructed Item objects."""
0 commit comments