Skip to content

Commit

Permalink
Merge pull request #11 from funnelfiasco/add_bens_example_scripts
Browse files Browse the repository at this point in the history
Add my example scripts
  • Loading branch information
mihaimaruseac authored Sep 22, 2024
2 parents b186a81 + 67060c2 commit 6697dff
Show file tree
Hide file tree
Showing 10 changed files with 388 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
These folders contain some sample data for ingestion into GUAC

- docs: proof-of-concept dataset containing variety of documents produced by `dl_docs.py`
- scripts: demo scripts to showcase GUAC's capabililies
- some-sboms: tiny dataset of a handful of `gcr.io` containers
- top-dh-sboms: dataset of top 100+ dockerhub containers
11 changes: 11 additions & 0 deletions scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# guac-scripts

A set of scripts to do interesting things with [GUAC](https://guac.sh).

These scripts are community-contributed and presented-as is.
They are intended to demonstrate GUAC's capabilties and do not receive active development.

## Scripts

* [guactober](guactober/) – A script to search for projects partcipating in [Hacktoberfest](https://hacktoberfest.com)
* [license_check](license_check/) — A script to look for mismatches between declared and discovered licenses
32 changes: 32 additions & 0 deletions scripts/guactober/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# guactober

A script to search your [GUAC](https://guac.sh) data for projects partcipating in [Hacktoberfest](https://hacktoberfest.com).

## Requirements

* Python modules
* json
* re
* PyGithub
* python-gitlab
* (optional) A GitHub token (either a PAT or classic token) in `../.github_token`
* (optional) A GitLab token in `../.gitlab_token`

**Note:** If you don't use the token files, you run the risk of getting rate-limited in your queries of GitHub and GitLab.

## Usage

After installing any missing requirements, run `python3 ./guactober.py`

The script assumes your query is in `./query.gql` and that your GraphQL query endpoint is `http://localhost:8080/query`.

The table below describes setting you may want to change.
All the settings described appear near the top of the script.

| Setting | Description
| ------- | -----------
| GITHUB_TOKEN_FILE | The path on disk to a file containing your GitHub token (and only your GitHub token)
| GITLAB_TOKEN_FILE | The path on disk to a file containing your GitLab token (and only your GitLab token)
| GRAPHQL_SERVER | The full URL to your GUAC GraphQL server's query endpoint

The script will print a list of repositories that are listed as participating in Hacktoberfest.
136 changes: 136 additions & 0 deletions scripts/guactober/guactober.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# SPDX-License-Identifier: copyleft-next-0.3.1

import re
import os.path
from github import Github
from gitlab import Gitlab
from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport

###
#
# Things you might need to change
#
###

# Your GUAC GraphQL server
GRAPHQL_SERVER = "http://localhost:8080/query"

# File containing a PAT or classic token for GitHub authentication
# If this file does not exist, we'll use an unauthenticated session,
# which probably means you'll get rate limited.
GITHUB_TOKEN_FILE='.github_token'

# File containing a PAT for GitLab authentication
# If this file does not exist, we'll use an unauthenticated session,
# which may mean you'll get rate limited.
GITLAB_TOKEN_FILE='.gitlab_token'

###
#
# Things you probably won't need to change
#
###

def queryGithub():
'''
Search for GitHub repos with the "hacktoberfest" topic
Inputs: none
Outputs: gh_participants (list)
'''
gh_participants = []
# Test for a GitHub token file and setup the GitHub session
if os.path.exists(GITHUB_TOKEN_FILE):
with open(GITHUB_TOKEN_FILE) as gh_token_file:
github_token = gh_token_file.read().strip()
gh_token_file.close()
github_session = Github(github_token)
else:
github_session = Github()
print("Using unauthenticated session for GitHub," + \
"you may get rate limited!")

print("Getting list of Hacktoberfest repos from GitHub (be patient!)")

response = github_session.search_repositories(query=f'topic:hacktoberfest')
for repo in response:
gh_participants.append("github.com/" + repo.full_name)

return gh_participants

def queryGitlab():
'''
Search for GitHub repos with the "hacktoberfest" topic
Inputs: none
Outputs: gl_participants (list)
'''
gl_participants = []

# Test for a GitHub token file and setup the GitHub session
if os.path.exists(GITLAB_TOKEN_FILE):
with open(GITLAB_TOKEN_FILE) as gl_token_file:
gitlab_token = gl_token_file.read().strip()
gl_token_file.close()
gitlab_session = Gitlab(private_token=gitlab_token)
else:
gitlab_session = Gitlab()
print("Using unauthenticated session for GitLab," + \
"you may get rate limited!")
print("Getting list of Hacktoberfest repos from GitLab (be patient!)")

response = gitlab_session.projects.list(get_all=True, topic="hacktoberfest")
for repo in response:
gl_participants.append("gitlab.com/" + repo.path_with_namespace)
return gl_participants

def queryGuac():
'''
Search the data in GUAC and return anything with HasSrcAt
Inputs: none
Outputs: sources (list)
'''
sources = []
print("Searching your GUAC data")
transport = RequestsHTTPTransport(url=GRAPHQL_SERVER)
gql_client = Client(transport=transport, fetch_schema_from_transport=True)

with open('query.gql') as query_file:
gql_query = gql(query_file.read())
query_file.close()

guac_data = gql_client.execute(gql_query)

for source_entry in guac_data['HasSourceAt']:
source = source_entry['source']['namespaces'][0]
sources.append(source['namespace'] + '/' + source['names'][0]['name'])

return sources

def findProjects(sources, participants):
'''
Search the participants from GitHub and GitLab in our GUAC data
Inputs: sources (list), participants(list)
Outputs: none
'''
hacktoberfest_deps = []
for repo in sources:
if repo.startswith('github.com') or repo.startswith('gitlab.com'):
if repo in participants:
hacktoberfest_deps.append(repo)

print("Here are the Hacktoberfest projects in your GUAC data:")
for dep in hacktoberfest_deps:
print(dep)

sources = queryGuac()

# Search the forges for participating projects
participants = []
participants.extend(queryGithub())
participants.extend(queryGitlab())

findProjects(sources, participants)
10 changes: 10 additions & 0 deletions scripts/guactober/query.gql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
HasSourceAt(hasSourceAtSpec: {}) {
source{
type namespaces{
namespace names{
name }
}
}
}
}
3 changes: 3 additions & 0 deletions scripts/guactober/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
python-gitlab==4.10.0
PyGithub==2.4.0
gql==3.5.0
29 changes: 29 additions & 0 deletions scripts/license_check/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# license_check

A script to search your [GUAC](https://guac.sh) data for projects that have a mismatch between declared and detected licenses.

## Requirements

* Python modules
* csv
* gql
* sys

## Usage

After installing any missing requirements, run `python3 ./license_check.py`

The script assumes your query is in `./query.gql` and that your GraphQL query endpoint is `http://localhost:8080/query`.

The table below describes setting you may want to change.
All the settings described appear near the top of the script.

| Setting | Description
| ------- | -----------
| GRAPHQL_SERVER | The full URL to your GUAC GraphQL server's query endpoint

The script will print packages or sources with mismatched licenses along with the declared license and the discovered license.

To write a CSV file instead, provide the file name as an argument.
For example: `python3 ./license_check.py license_mismatch.csv`
This results in a CSV file with three columns: package/source, declared license, discovered license.
80 changes: 80 additions & 0 deletions scripts/license_check/license_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# SPDX-License-Identifier: copyleft-next-0.3.1

import csv
import sys
from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport

###
#
# Things you might need to change
#
###

# Your GUAC GraphQL server
GRAPHQL_SERVER = "http://localhost:8080/query"

###
#
# Things you probably won't need to change
#
###

def queryGuac():
'''
Search the data in GUAC and return anything with CertifyLegal
Inputs: none
Outputs: licenseinfo (dict of lists)
'''
licenseData = {}
print("Searching your GUAC data")
transport = RequestsHTTPTransport(url=GRAPHQL_SERVER)
gql_client = Client(transport=transport, fetch_schema_from_transport=True)

with open('query.gql') as query_file:
gql_query = gql(query_file.read())
query_file.close()

guac_data = gql_client.execute(gql_query)

for legal in guac_data['CertifyLegal']:
namespace = legal['subject']['namespaces'][0]['namespace']
if not namespace:
package = legal['subject']['namespaces'][0]['names'][0]['name']
else:
package = namespace + "/" + legal['subject']['namespaces'][0]['names'][0]['name']

declaredLicense = legal['declaredLicense']
discoveredLicense = legal['discoveredLicense']
if declaredLicense and discoveredLicense:
licenseData[package] = [ declaredLicense, discoveredLicense]

return licenseData

def checkLicenses(licenseData):
'''
Comments go here
'''

if sys.argv[1:]:
# Oh, you want me to write to a file? Okay!
outfile = open(sys.argv[1], 'w', newline='')
csvfile = csv.writer(outfile)
csvfile.writerow(['Package', 'Declared', 'Discovered'])

for entry in licenseData:
declaredLicense = licenseData[entry][0]
discoveredLicense = licenseData[entry][1]
if declaredLicense != discoveredLicense:
if sys.argv[1:]:
# Write to the file
csvfile.writerow([entry,declaredLicense, discoveredLicense])
else:
# Okay, just print to STDOUT
print(entry)
print("\tDeclares: " + licenseData[entry][0])
print("\tDiscovered: "+ licenseData[entry][1])

licenseData = queryGuac()
checkLicenses(licenseData)
Loading

0 comments on commit 6697dff

Please sign in to comment.