From 8e55156c644226101dd3fa145049404748581279 Mon Sep 17 00:00:00 2001 From: my8100 Date: Sun, 28 Apr 2019 13:15:00 +0800 Subject: [PATCH] Temp support for Scrapyd v1.3.0 (not released) https://github.com/scrapy/scrapyd/pull/256 This improves the job listing table. It adds a cancel endpoint (part of #254) and rewrites the resource class (refactoring). --- scrapydweb/myview.py | 2 +- scrapydweb/overview/jobs.py | 5 ++++- scrapydweb/utils/poll.py | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/scrapydweb/myview.py b/scrapydweb/myview.py index e226384..5ca6ab5 100644 --- a/scrapydweb/myview.py +++ b/scrapydweb/myview.py @@ -241,7 +241,7 @@ def make_request(self, url, data=None, auth=None, as_json=True, dumps_json=True, r = session.post(url, data=data, auth=auth, timeout=timeout) else: r = session.get(url, auth=auth, timeout=timeout) - r.encoding = 'utf8' + r.encoding = 'utf-8' except Exception as err: # self.logger.error('!!!!! %s %s' % (err.__class__.__name__, err)) self.logger.error("!!!!! error with %s: %s", url, err) diff --git a/scrapydweb/overview/jobs.py b/scrapydweb/overview/jobs.py index cf85531..585f679 100644 --- a/scrapydweb/overview/jobs.py +++ b/scrapydweb/overview/jobs.py @@ -25,7 +25,7 @@ STATUS_FINISHED = '2' NOT_DELETED = '0' DELETED = '1' -HREF_PATTERN = re.compile(r"href='(.+?)'") +HREF_PATTERN = re.compile(r"""href=['"](.+?)['"]""") # Temp support for Scrapyd v1.3.0 (not released) JOB_PATTERN = re.compile(r""" (?P.*?) @@ -37,6 +37,7 @@ (?:(?P.*?))? (?:(?P.*?))? (?:(?P.*?))? + [\w\W]*? # Temp support for Scrapyd v1.3.0 (not released) """, re.X) JOB_KEYS = ['project', 'spider', 'job', 'pid', 'start', 'runtime', 'finish', 'href_log', 'href_items'] @@ -96,6 +97,8 @@ def dispatch_request(self, **kwargs): tip="Click the above link to make sure your Scrapyd server is accessable. " ) return render_template(self.template_fail, **kwargs) + # Temp support for Scrapyd v1.3.0 (not released) + self.text = re.sub(r'.*?', '', self.text, flags=re.S) self.jobs = [dict(zip(JOB_KEYS, job)) for job in re.findall(JOB_PATTERN, self.text)] self.jobs_backup = list(self.jobs) diff --git a/scrapydweb/utils/poll.py b/scrapydweb/utils/poll.py index c5541b2..155c487 100644 --- a/scrapydweb/utils/poll.py +++ b/scrapydweb/utils/poll.py @@ -35,6 +35,7 @@ (?:(?P.*?))? (?:(?P.*?))? (?:(?P.*?))? + [\w\W]*? # Temp support for Scrapyd v1.3.0 (not released) """, re.X) JOB_KEYS = ['project', 'spider', 'job', 'pid', 'start', 'runtime', 'finish', 'log', 'items'] @@ -105,7 +106,9 @@ def fetch_jobs(self, node, url, auth): assert r is not None, "[node %s] fetch_jobs failed: %s" % (node, url) self.logger.debug("[node %s] fetch_jobs got (%s) %s bytes", node, r.status_code, len(r.content)) - jobs = [dict(zip(JOB_KEYS, job)) for job in re.findall(JOB_PATTERN, r.text)] + # Temp support for Scrapyd v1.3.0 (not released) + text = re.sub(r'.*?', '', r.text, flags=re.S) + jobs = [dict(zip(JOB_KEYS, job)) for job in re.findall(JOB_PATTERN, text)] for job in jobs: job_tuple = (job['project'], job['spider'], job['job']) if job['pid']: @@ -167,6 +170,7 @@ def make_request(self, url, auth, post=False): r = self.session.post(url, auth=auth, timeout=self.timeout) else: r = self.session.get(url, auth=auth, timeout=self.timeout) + r.encoding = 'utf-8' assert r.status_code == 200, "got status_code %s" % r.status_code except Exception as err: self.logger.error("make_request failed: %s\n%s", url, err)