From 0b7cd4af9f019b009373a68c31f95a7be3db4d97 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Fri, 20 Sep 2024 08:50:29 +0200 Subject: [PATCH 01/10] Add start_time to job response --- .github/workflows/test-docker.yml | 37 ----- .github/workflows/test-k8s.yml | 47 ------ .github/workflows/test-manifest.yml | 61 ------- .github/workflows/test.yml | 152 ++++++++++++++++++ README.md | 2 +- scrapyd_k8s/__init__.py | 0 scrapyd_k8s/launcher/docker.py | 8 +- scrapyd_k8s/launcher/k8s.py | 8 +- scrapyd_k8s/tests/__init__.py | 0 scrapyd_k8s/tests/integration/__init__.py | 0 .../tests/integration/test_api.py | 9 +- scrapyd_k8s/tests/unit/__init__.py | 0 scrapyd_k8s/tests/unit/test_utils.py | 18 +++ scrapyd_k8s/utils.py | 13 +- 14 files changed, 200 insertions(+), 155 deletions(-) delete mode 100644 .github/workflows/test-docker.yml delete mode 100644 .github/workflows/test-k8s.yml delete mode 100644 .github/workflows/test-manifest.yml create mode 100644 .github/workflows/test.yml create mode 100644 scrapyd_k8s/__init__.py create mode 100644 scrapyd_k8s/tests/__init__.py create mode 100644 scrapyd_k8s/tests/integration/__init__.py rename test_api.py => scrapyd_k8s/tests/integration/test_api.py (95%) create mode 100644 scrapyd_k8s/tests/unit/__init__.py create mode 100644 scrapyd_k8s/tests/unit/test_utils.py diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml deleted file mode 100644 index 22861c6..0000000 --- a/.github/workflows/test-docker.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Tests on Docker -on: - push: - branches: - - main - pull_request: - -jobs: - container: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.11 - cache: 'pip' - - - name: Install dependencies - run: | - pip install -r requirements.txt - pip install -r requirements-test.txt - - - name: Pull example spider - run: docker pull ghcr.io/q-m/scrapyd-k8s-spider-example - - - name: Run scrapyd-k8s - run: | - cp scrapyd_k8s.sample-docker.conf scrapyd_k8s.conf - python -m scrapyd_k8s & - while ! nc -q 1 localhost 6800 Date: Tue, 5 Nov 2024 11:09:03 +0100 Subject: [PATCH 02/10] Add end time to job representation in the joblist endpoint --- .github/workflows/test.yml | 1 + README.md | 4 ++-- scrapyd_k8s/launcher/docker.py | 1 + scrapyd_k8s/launcher/k8s.py | 1 + scrapyd_k8s/tests/integration/test_api.py | 16 ++++++++++------ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 373e1ec..545b773 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -147,6 +147,7 @@ jobs: - name: Run tests run: | + TEST_WITH_K8S=1 \ TEST_MAX_WAIT=60 \ TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \ pytest -vv --color=yes scrapyd_k8s/tests/integration/ diff --git a/README.md b/README.md index cf84b54..253b144 100644 --- a/README.md +++ b/README.md @@ -149,9 +149,9 @@ curl http://localhost:6800/listjobs.json ``` ```json { - "finished":[], + "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:10:00.000000"}], // End time will only be available for the k8s launcher, for docker it will always be None "pending":[], - "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time": "2012-09-12 10:14:03.594664"}], + "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time":"2012-09-12 10:14:03.594664", "end_time":"None"}], "status":"ok" } ``` diff --git a/scrapyd_k8s/launcher/docker.py b/scrapyd_k8s/launcher/docker.py index d4fa09a..ed58c52 100644 --- a/scrapyd_k8s/launcher/docker.py +++ b/scrapyd_k8s/launcher/docker.py @@ -79,6 +79,7 @@ def _parse_job(self, c): 'project': c.labels.get(self.LABEL_PROJECT), 'spider': c.labels.get(self.LABEL_SPIDER), 'start_time': format_iso_date_string(c.attrs['State']['StartedAt']) if state in ['running', 'finished'] else None, + 'end_time': None, # Not available using Docker's API. Add to the job representation to keep it the same as K8s jobs listing. } def _get_container(self, project_id, job_id): diff --git a/scrapyd_k8s/launcher/k8s.py b/scrapyd_k8s/launcher/k8s.py index b05ae98..10fe1bf 100644 --- a/scrapyd_k8s/launcher/k8s.py +++ b/scrapyd_k8s/launcher/k8s.py @@ -132,6 +132,7 @@ def _parse_job(self, job): 'project': job.metadata.labels.get(self.LABEL_PROJECT), 'spider': job.metadata.labels.get(self.LABEL_SPIDER), 'start_time': format_datetime_object(job.status.start_time) if state in ['running', 'finished'] else None, + 'end_time': format_datetime_object(job.status.completion_time) if state == 'finished' else None, } def _get_job(self, project, job_id): diff --git a/scrapyd_k8s/tests/integration/test_api.py b/scrapyd_k8s/tests/integration/test_api.py index 443444c..7e6eb66 100644 --- a/scrapyd_k8s/tests/integration/test_api.py +++ b/scrapyd_k8s/tests/integration/test_api.py @@ -13,6 +13,7 @@ RUN_SPIDER = os.getenv('TEST_RUN_SPIDER', 'static') MAX_WAIT = int(os.getenv('TEST_MAX_WAIT', '6')) STATIC_SLEEP = float(os.getenv('TEST_STATIC_SLEEP', '2')) +WITH_K8S = bool(os.getenv('TEST_WITH_K8S')) def test_root_ok(): response = requests.get(BASE_URL) @@ -158,8 +159,9 @@ def test_scenario_cancel_running_finished_ok(): # wait until the job has stopped listjobs_wait(jobid, 'finished') jobinfo = assert_listjobs(finished=jobid) - start_time = jobinfo.pop('start_time') - assert start_time and datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + start_time, end_time = jobinfo.pop('start_time'), jobinfo.pop('end_time') + assert datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + assert datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S.%f') if WITH_K8S else end_time is None assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'finished' } # then cancel it again, though nothing would happen response = requests.post(BASE_URL + '/cancel.json', data={ 'project': RUN_PROJECT, 'job': jobid }) @@ -179,15 +181,17 @@ def scenario_regular(schedule_args): # wait until the job is running listjobs_wait(jobid, 'running') jobinfo = assert_listjobs(running=jobid) - start_time = jobinfo.pop('start_time') - assert start_time and datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + start_time, end_time = jobinfo.pop('start_time'), jobinfo.pop('end_time') + assert datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + assert end_time is None assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'running' } # wait until the job has finished listjobs_wait(jobid, 'finished') # check listjobs output jobinfo = assert_listjobs(finished=jobid) - start_time = jobinfo.pop('start_time') - assert start_time and datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + start_time, end_time = jobinfo.pop('start_time'), jobinfo.pop('end_time') + assert datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') + assert datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S.%f') if WITH_K8S else end_time is None assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'finished' } def assert_response_ok(response): From 43c3f5942add0260ba652082e3b41ccc0473ff9e Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Tue, 5 Nov 2024 11:36:51 +0100 Subject: [PATCH 03/10] Handle completion time empty in V1JobStatus which happens when a job in cancelled --- scrapyd_k8s/launcher/k8s.py | 2 +- scrapyd_k8s/tests/integration/test_api.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapyd_k8s/launcher/k8s.py b/scrapyd_k8s/launcher/k8s.py index 10fe1bf..76910e7 100644 --- a/scrapyd_k8s/launcher/k8s.py +++ b/scrapyd_k8s/launcher/k8s.py @@ -132,7 +132,7 @@ def _parse_job(self, job): 'project': job.metadata.labels.get(self.LABEL_PROJECT), 'spider': job.metadata.labels.get(self.LABEL_SPIDER), 'start_time': format_datetime_object(job.status.start_time) if state in ['running', 'finished'] else None, - 'end_time': format_datetime_object(job.status.completion_time) if state == 'finished' else None, + 'end_time': format_datetime_object(job.status.completion_time) if job.status.completion_time and state == 'finished' else None, } def _get_job(self, project, job_id): diff --git a/scrapyd_k8s/tests/integration/test_api.py b/scrapyd_k8s/tests/integration/test_api.py index 7e6eb66..45f51b2 100644 --- a/scrapyd_k8s/tests/integration/test_api.py +++ b/scrapyd_k8s/tests/integration/test_api.py @@ -161,7 +161,7 @@ def test_scenario_cancel_running_finished_ok(): jobinfo = assert_listjobs(finished=jobid) start_time, end_time = jobinfo.pop('start_time'), jobinfo.pop('end_time') assert datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f') - assert datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S.%f') if WITH_K8S else end_time is None + assert end_time is None assert jobinfo == { 'id': jobid, 'project': RUN_PROJECT, 'spider': RUN_SPIDER, 'state': 'finished' } # then cancel it again, though nothing would happen response = requests.post(BASE_URL + '/cancel.json', data={ 'project': RUN_PROJECT, 'job': jobid }) From a7c26cee7d18c510d50fa5ccf4d8299f002cf539 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Tue, 5 Nov 2024 11:37:51 +0100 Subject: [PATCH 04/10] Update env variables to manifest test --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 545b773..65df0aa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -106,6 +106,7 @@ jobs: - name: Run tests run: | + TEST_WITH_K8S=1 \ TEST_BASE_URL=`minikube service scrapyd-k8s --url` \ TEST_MAX_WAIT=60 \ TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \ From 0e417e460cf1fb8a8830c7377e95f03aa96b17fb Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Tue, 5 Nov 2024 11:45:03 +0100 Subject: [PATCH 05/10] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 253b144..c0fa354 100644 --- a/README.md +++ b/README.md @@ -149,9 +149,9 @@ curl http://localhost:6800/listjobs.json ``` ```json { - "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:10:00.000000"}], // End time will only be available for the k8s launcher, for docker it will always be None + "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:20:00.000000"}], // End time will only be available for the k8s launcher (when the job is finished and not cancelled), for docker it will always be null "pending":[], - "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time":"2012-09-12 10:14:03.594664", "end_time":"None"}], + "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time":"2012-09-12 10:14:03.594664", "end_time":null}], "status":"ok" } ``` From 61fc6c6578611119bcef37e3b8160683ad3e4f27 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Tue, 5 Nov 2024 13:04:07 +0100 Subject: [PATCH 06/10] Update readme --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c0fa354..f1a70ce 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ curl http://localhost:6800/listjobs.json ``` ```json { - "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:20:00.000000"}], // End time will only be available for the k8s launcher (when the job is finished and not cancelled), for docker it will always be null + "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:20:00.000000"}], "pending":[], "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time":"2012-09-12 10:14:03.594664", "end_time":null}], "status":"ok" @@ -229,6 +229,10 @@ Lists spiders from the spider image's `org.scrapy.spiders` label. Lists current jobs by looking at Docker containers or Kubernetes jobs. +#### Limitations + +* **End time**; The job's end time will be populated only for the Kubernetes (k8s) launcher, provided the job finishes successfully and is not canceled. For Docker, this value will always be null. + ### ~~`delversion.json`~~ ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#delversion-json)) Not supported, by design. From 33a5e835471d2ee3bde49716fc27ae5d77a09d27 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Tue, 5 Nov 2024 13:05:26 +0100 Subject: [PATCH 07/10] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f1a70ce..cfe7740 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ curl http://localhost:6800/listjobs.json ``` ```json { - "finished":[{"id":"fbjffo6tz5al5qjft0zrv0t7b0vzccxx","project":"example","spider":"static","state":"finished", "start_time":"2012-09-12 10:10:00.000000", "end_time":"2012-09-12 10:20:00.000000"}], + "finished":[], "pending":[], "running":[{"id":"e9b81fccbec211eeb3b109f30f136c01","project":"example","spider":"quotes","state":"running", "start_time":"2012-09-12 10:14:03.594664", "end_time":null}], "status":"ok" From dd406f59302994de61f5dd81b23c07d6709f752f Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Fri, 8 Nov 2024 08:11:08 +0100 Subject: [PATCH 08/10] Make the information about the end time in the readme terser --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index cfe7740..71a7899 100644 --- a/README.md +++ b/README.md @@ -229,9 +229,7 @@ Lists spiders from the spider image's `org.scrapy.spiders` label. Lists current jobs by looking at Docker containers or Kubernetes jobs. -#### Limitations - -* **End time**; The job's end time will be populated only for the Kubernetes (k8s) launcher, provided the job finishes successfully and is not canceled. For Docker, this value will always be null. +* **End time**: Set only for completed Kubernetes jobs; always null for Docker. ### ~~`delversion.json`~~ ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#delversion-json)) From abd98a5f9c00e786daf50fc133e71eeaf92a1f3c Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Fri, 8 Nov 2024 09:12:44 +0100 Subject: [PATCH 09/10] Update readme --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 71a7899..c41b0da 100644 --- a/README.md +++ b/README.md @@ -228,8 +228,7 @@ Lists spiders from the spider image's `org.scrapy.spiders` label. ### `listjobs.json` ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#listjobs-json)) Lists current jobs by looking at Docker containers or Kubernetes jobs. - -* **End time**: Set only for completed Kubernetes jobs; always null for Docker. ++Note that `end_time` is not yet supported for Docker. ### ~~`delversion.json`~~ ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#delversion-json)) From a110706584b59e36473cea45e38888af10888df2 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Fri, 8 Nov 2024 09:24:11 +0100 Subject: [PATCH 10/10] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c41b0da..f53ebec 100644 --- a/README.md +++ b/README.md @@ -228,7 +228,7 @@ Lists spiders from the spider image's `org.scrapy.spiders` label. ### `listjobs.json` ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#listjobs-json)) Lists current jobs by looking at Docker containers or Kubernetes jobs. -+Note that `end_time` is not yet supported for Docker. +Note that `end_time` is not yet supported for Docker. ### ~~`delversion.json`~~ ([➽](https://scrapyd.readthedocs.io/en/latest/api.html#delversion-json))