From 0d2bc8fca8fcf2d438f4dea9d8d9c6db4b0fc913 Mon Sep 17 00:00:00 2001 From: lbl4 <124307982+lbl4@users.noreply.github.com> Date: Wed, 27 Sep 2023 14:16:07 +0000 Subject: [PATCH] scrapy: initial integration (#10991) Initial integration of scrapy. @wRAR do you agree with this? --- projects/scrapy/Dockerfile | 21 +++++++++++++++ projects/scrapy/build.sh | 22 ++++++++++++++++ projects/scrapy/fuzz_crawl.py | 49 +++++++++++++++++++++++++++++++++++ projects/scrapy/project.yaml | 11 ++++++++ 4 files changed, 103 insertions(+) create mode 100644 projects/scrapy/Dockerfile create mode 100755 projects/scrapy/build.sh create mode 100644 projects/scrapy/fuzz_crawl.py create mode 100644 projects/scrapy/project.yaml diff --git a/projects/scrapy/Dockerfile b/projects/scrapy/Dockerfile new file mode 100644 index 000000000000..d6d438fbe8fb --- /dev/null +++ b/projects/scrapy/Dockerfile @@ -0,0 +1,21 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +FROM gcr.io/oss-fuzz-base/base-builder-python +RUN pip3 install --upgrade pip +RUN git clone https://github.com/scrapy/scrapy.git scrapy +COPY *.sh *py $SRC/ +WORKDIR $SRC/scrapy diff --git a/projects/scrapy/build.sh b/projects/scrapy/build.sh new file mode 100755 index 000000000000..d007714373ee --- /dev/null +++ b/projects/scrapy/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash -eu +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +pip3 install . + +for fuzzer in $(find $SRC -name 'fuzz_*.py'); do + compile_python_fuzzer $fuzzer +done diff --git a/projects/scrapy/fuzz_crawl.py b/projects/scrapy/fuzz_crawl.py new file mode 100644 index 000000000000..d40ec463df8f --- /dev/null +++ b/projects/scrapy/fuzz_crawl.py @@ -0,0 +1,49 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +import sys +import atheris +import scrapy +from scrapy.crawler import CrawlerProcess + +class test_spider(scrapy.Spider): + start_urls = ['http://google.com', 'http://youtube.com/'] + + def parse(self, response): + pass + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + test = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 4096)) + + try: + process = CrawlerProcess(settings={ + test + }) + process.crawl(test_spider) + process.start() + except: + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/projects/scrapy/project.yaml b/projects/scrapy/project.yaml new file mode 100644 index 000000000000..2deb0e8423a0 --- /dev/null +++ b/projects/scrapy/project.yaml @@ -0,0 +1,11 @@ +homepage: "https://scrapy.org/" +language: python +primary_contact: "wrar42@gmail.com" +auto_ccs: + - "lbl4gojevic@gmail.com" +fuzzing_engines: + - libfuzzer +sanitizers: + - address + - undefined +main_repo: "https://github.com/scrapy/scrapy.git"