Skip to content

Commit

Permalink
created basic fetch and availability functions for scorecode pipeline…
Browse files Browse the repository at this point in the history
… nexB#598

Signed-off-by: 404-geek <pranayd61@gmail.com>
  • Loading branch information
404-geek committed Jul 7, 2024
1 parent bc445c1 commit c2f5c4d
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 1 deletion.
2 changes: 1 addition & 1 deletion scancodeio/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,4 +417,4 @@

# OpenSSF ScoreCard Integration

SCORECARD_URL = env.str('SCORECARD_URL', default="")
SCORECARD_URL = env.str("SCORECARD_URL", default="")
59 changes: 59 additions & 0 deletions scanpipe/pipelines/get_scorecard_info_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import Pipeline
from scanpipe.pipes import scorecode


class FetchScoreCodeInfo(Pipeline):
"""
Fetch scorecode information for packages and dependencies.
scorecode data is stored on each package and dependency instance.
"""

download_inputs = False
is_addon = True

@classmethod
def steps(cls):
return (
cls.check_scorecode_service_availability,
cls.lookup_packages_scorecode_info,
cls.lookup_dependencies_scorecode_info,
)

def check_scorecode_service_availability(self):
"""Check if the scorecode service is configured and available."""
if not scorecode.is_configured():
raise Exception("scorecode service is not configured.")

if not scorecode.is_available():
raise Exception("scorecode service is not available.")

def lookup_packages_scorecode_info(self):
"""Fetch scorecode information for each of the project's discovered packages."""
packages = self.project.discoveredpackages.all()
scorecode.fetch_scorecode_info(
packages=packages,
logger=self.log,
)
62 changes: 62 additions & 0 deletions scanpipe/pipes/ScoreCode.py → scanpipe/pipes/scorecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
# Visit https://github.com/nexB/scancode.io for support and download.

import logging
from collections import namedtuple
from urllib.parse import urlparse

from django.conf import settings

import requests
from ossf_scorecard.scorecard import GetScorecard

label = "ScoreCode"
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -58,3 +61,62 @@ def is_available():
return False

return response.status_code == requests.codes.ok


def fetch_scorecard_info(packages, logger):
"""
Fetch scorecard information for the given packages.
Args:
packages (QuerySet): A queryset of package instances.
logger (Logger): A logger instance to log messages.
"""

for package in packages:
url = package.vcs_url
repo_data = extract_repo_info(url)

if repo_data:

scorecard_data = GetScorecard(
platform=repo_data.platform, org=repo_data.org, repo=repo_data.repo
)

logger.info(f"Fetching scorecard data for package: {scorecard_data}")


def extract_repo_info(url):
"""
Extract platform, org, and repo from a given GitHub or GitLab URL.
Args:
url (str): The URL to parse.
Returns:
RepoData: Named tuple containing 'platform', 'org', and 'repo' if the URL is
valid, else None.
"""
RepoData = namedtuple("RepoData", ["platform", "org", "repo"])

parsed_url = urlparse(url)
hostname = parsed_url.hostname

if not hostname:
return None

if "github.com" in hostname:
platform = "github"
elif "gitlab.com" in hostname:
platform = "gitlab"
else:
return None

path_parts = parsed_url.path.strip("/").split("/")

if len(path_parts) < 2:
return None

org = path_parts[0]
repo = path_parts[1]

return RepoData(platform=platform, org=org, repo=repo)

0 comments on commit c2f5c4d

Please sign in to comment.