Skip to content

Commit

Permalink
feat: Initialize SqliteSpiderQueue with config. Defer dbs_dir logic t…
Browse files Browse the repository at this point in the history
…o spider queue implementation. Respect :memory: and URL values for dbs_dir.
  • Loading branch information
jpmckinney committed Mar 9, 2023
1 parent da69c30 commit 040cc67
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 19 deletions.
9 changes: 7 additions & 2 deletions docs/news.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@ Unreleased
Added
~~~~~

- Webservice
- Add ``spiderqueue`` configuration option for custom spider queue.

Changed
~~~~~~~

- Add ``spiderqueue`` configuration option for custom spider queue.
- ``scrapyd.spiderqueue.SqliteSpiderQueue`` is initialized with a ``scrapyd.config.Config`` object and a project name, rather than a SQLite connection string (i.e. database file path).
- If ``dbs_dir`` is set to ``:memory`` or to a URL, it is passed through without modification and without creating a directory to ``scrapyd.jobstorage.SqliteJobStorage`` and ``scrapyd.spiderqueue.SqliteSpiderQueue``.
- ``scrapyd.utils.get_spider_queues`` defers the creation of the ``dbs_dir`` directory to the spider queue implementation.

1.4.1 (2023-02-10)
------------------
Expand Down
8 changes: 2 additions & 6 deletions scrapyd/jobstorage.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import os
from datetime import datetime

from zope.interface import implementer

from scrapyd.interfaces import IJobStorage
from scrapyd.sqlite import SqliteFinishedJobs
from scrapyd.utils import sqlite_connection_string


def job_log_url(job):
Expand Down Expand Up @@ -50,11 +50,7 @@ def __iter__(self):
class SqliteJobStorage(object):

def __init__(self, config):
dbsdir = config.get('dbs_dir', 'dbs')
if not os.path.exists(dbsdir):
os.makedirs(dbsdir)
dbpath = os.path.join(dbsdir, 'jobs.db')
self.jstorage = SqliteFinishedJobs(dbpath, "finished_jobs")
self.jstorage = SqliteFinishedJobs(sqlite_connection_string(config, 'jobs'), "finished_jobs")
self.finished_to_keep = config.getint('finished_to_keep', 100)

def add(self, job):
Expand Down
5 changes: 3 additions & 2 deletions scrapyd/spiderqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

from scrapyd.interfaces import ISpiderQueue
from scrapyd.sqlite import JsonSqlitePriorityQueue
from scrapyd.utils import sqlite_connection_string


@implementer(ISpiderQueue)
class SqliteSpiderQueue(object):

def __init__(self, database=None, table='spider_queue'):
self.q = JsonSqlitePriorityQueue(database, table)
def __init__(self, config, project, table='spider_queue'):
self.q = JsonSqlitePriorityQueue(sqlite_connection_string(config, project), table)

def add(self, name, priority=0.0, **spider_args):
d = spider_args.copy()
Expand Down
3 changes: 2 additions & 1 deletion scrapyd/tests/test_spiderqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from zope.interface.verify import verifyObject

from scrapyd import spiderqueue
from scrapyd.config import Config
from scrapyd.interfaces import ISpiderQueue


Expand All @@ -11,7 +12,7 @@ class SpiderQueueTest(unittest.TestCase):
"""

def setUp(self):
self.q = spiderqueue.SqliteSpiderQueue(':memory:')
self.q = spiderqueue.SqliteSpiderQueue(Config(values={'dbs_dir': ':memory:'}), 'quotesbot')
self.name = 'spider1'
self.priority = 5
self.args = {
Expand Down
19 changes: 11 additions & 8 deletions scrapyd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
from subprocess import PIPE, Popen
from urllib.parse import urlsplit

from packaging.version import InvalidVersion, Version
from scrapy.utils.misc import load_object
Expand Down Expand Up @@ -54,15 +55,17 @@ def __setitem__(self, key, value):

def get_spider_queues(config):
"""Return a dict of Spider Queues keyed by project name"""
dbsdir = config.get('dbs_dir', 'dbs')
if not os.path.exists(dbsdir):
os.makedirs(dbsdir)
spiderqueue = load_object(config.get('spiderqueue', 'scrapyd.spiderqueue.SqliteSpiderQueue'))
d = {}
for project in get_project_list(config):
dbpath = os.path.join(dbsdir, '%s.db' % project)
d[project] = spiderqueue(dbpath)
return d
return {project: spiderqueue(config, project) for project in get_project_list(config)}


def sqlite_connection_string(config, database):
dbs_dir = config.get('dbs_dir', 'dbs')
if dbs_dir == ':memory:' or urlsplit(dbs_dir).scheme:
return dbs_dir
if not os.path.exists(dbs_dir):
os.makedirs(dbs_dir)
return os.path.join(dbs_dir, f'{database}.db')


def get_project_list(config):
Expand Down

0 comments on commit 040cc67

Please sign in to comment.