-
Notifications
You must be signed in to change notification settings - Fork 1
/
tidelift-lookup
executable file
·162 lines (140 loc) · 5.04 KB
/
tidelift-lookup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/python
#
# SPDX-FileCopyrightText: 2024 Tidelift, Inc.
#
# SPDX-License-Identifier: BlueOak-1.0.0
#
# Read a series of syft outputs, and return a CSV that
# assesses them against Tidelift's API to determine
# what bad dependencies are in use that the organization
# should move away from.
import csv
import json
import os
import sys
from functools import cmp_to_key
import urllib3
# the Tidelift bulk API has a limit of 1000 entries. Pick something below that
BATCH_SIZE = 900
# Pull API key from environment
key = os.getenv("TIDELIFT_API_KEY")
if len(sys.argv) < 2:
print("Usage: tidelift-lookup <sbom> <sbom2> <sbom3>..")
sys.exit(1)
# Compare two software versions
#
# We don't use a specific python semver library because we want to
# do our best with versions that aren't technically semver.
def vercmp(a, b, major_only=False):
a_parts = a.split(".")
b_parts = b.split(".")
index = 0
while index < len(a_parts):
try:
return int(a_parts[index]) - int(b_parts[index])
except:
pass
if major_only:
return 0
index += 1
return 0
def rowcmp(a, b):
return vercmp(a[0], b[0])
sboms = {}
for filename in sys.argv[1:]:
with open(filename, "r") as sbomfile:
data = sbomfile.readlines()
app_version = filename.replace(".syft", "")
sboms[app_version] = []
for line in data:
try:
(name, version, platform) = line.split()
except: # GitHub actions, other non-platform things may not have all fields
continue
if platform in ("github-action", "TYPE"):
continue
if (
platform == "python"
): # TODO: see if more mappings for syft platforms are needed
platform = "pypi"
if (platform, name, version) not in sboms[app_version]:
sboms[app_version].append((platform, name, version))
pool = urllib3.PoolManager()
headers = urllib3.HTTPHeaderDict()
headers.add("Accept", "application/json")
headers.add("Authorization", f"bearer {key}")
output = []
for sbom_version in sboms.keys():
index = 0
pkgs = sboms[sbom_version]
good_packages = []
unassessed_packages = []
bad_packages = []
more_than_one_major_behind_packages = []
while True:
subset = pkgs[index : index + BATCH_SIZE]
rawdata = {
"packages": list(map(lambda x: {"platform": x[0], "name": x[1]}, subset))
}
# Use Tidelift's bulk package API (much faster than individual requests)
resp = pool.request(
"POST",
"https://api.tidelift.com/external-api/v1/packages/lookup",
headers=headers,
json=rawdata,
)
if resp.status != 200:
print(f"Error retrieving data from API: {resp.status}")
sys.exit(1)
for pkg in resp.json()["packages"]:
key = f"{pkg['platform']}/{pkg['name']}"
# Check the recommendation status
if pkg["tidelift_recommendation"] == "recommended":
good_packages.append(key)
else:
bad_packages.append(key)
# See if we're more than one major behind the latest recommended release
if pkg["latest_recommended_release"]["version"]:
for sbom_entry in pkgs:
if (
sbom_entry[0] == pkg["platform"]
and sbom_entry[1] == pkg["name"]
):
sbom_ver = sbom_entry[2]
rec_ver = pkg["latest_recommended_release"]["version"]
if vercmp(rec_ver, sbom_ver, major_only=True) >= 1:
more_than_one_major_behind_packages.append(key)
for pkg in resp.json()["missing_results"]:
key = f"{pkg['platform']}/{pkg['name']}"
unassessed_packages.append(key)
index += BATCH_SIZE
if index > len(pkgs):
break
output.append(
(
sbom_version,
len(good_packages),
bad_packages,
len(bad_packages),
unassessed_packages,
len(unassessed_packages),
more_than_one_major_behind_packages,
len(more_than_one_major_behind_packages),
)
)
output.sort(key=cmp_to_key(rowcmp))
with sys.stdout as csvfile:
fieldnames = (
"version", # application version
"good_count", # number of 'good' dependencies
"bad", # list of 'bad' dependencies
"bad_count", # count of 'bad' dependencies
"unassesed", # list of unassessed-by-Tidelift dependencies (internal pkgs, etc)
"unassessed_count", # count of unassessed-by-Tidelift dependencies
"behind", # list of dependences not on the latest major release stream
"behind_count", # count of dependencies not on the latest major release stream
)
writer = csv.writer(csvfile)
writer.writerow(fieldnames)
for entry in output:
writer.writerow(entry)