From a4b234b7101eaa8191db8bfa27d0e7411b42128a Mon Sep 17 00:00:00 2001 From: Gregory Petukhov Date: Sun, 13 May 2018 01:23:07 +0300 Subject: [PATCH] Fix #346: spider does not process `initial_urls` --- grab/spider/base.py | 1 + tests/spider_task.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/grab/spider/base.py b/grab/spider/base.py index 817848c7..258fd0ff 100644 --- a/grab/spider/base.py +++ b/grab/spider/base.py @@ -663,6 +663,7 @@ def run(self): self.prepare() if self.task_queue is None: self.setup_queue() + self.process_initial_urls() services = [ self.task_dispatcher, self.task_generator_service, diff --git a/tests/spider_task.py b/tests/spider_task.py index 08be3185..b93bfc37 100644 --- a/tests/spider_task.py +++ b/tests/spider_task.py @@ -395,3 +395,17 @@ def task_generator(self): bot.add_task(task) bot.run() self.assertEqual(1, bot.stat.counters['foo']) + + def test_initial_urls(self): + url = self.server.get_url() + + class TestSpider(Spider): + initial_urls = [url] + + def task_initial(self, unused_grab, unused_task): + self.stat.inc('foo', 1) + + bot = build_spider(TestSpider) + bot.run() + + self.assertEqual(1, bot.stat.counters['foo'])