Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: refactoring #9

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,14 @@ Monitoring depGraph via Snyk API ...
```

### Pruning
If you encounter a HTTP 500 when performing `test` or `monitor` commands, then try to enable pruning.
If you encounter a HTTP 422 when performing `test` or `monitor` commands, with the accompaying error message:
`Retrying: {"error":"Failed to generate snapshot. Please contact support on support@snyk.io"}`
then try to enable pruning.

What is likely happening is that there are too many vulnerable paths for the system (>100,000), so
pruning the repeated sub-dependencies will alleviate this.

You may run with `--prune` all the time to avoid this error.
You may run with `--prune` or `--prune-all` to avoid this error.

## Currently supported package types
* maven (tested with rules_jvm_external)
Expand Down
7 changes: 4 additions & 3 deletions bazel2snyk/bazel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_coordinates_from_bazel_dep(self, bazel_dep, package_source):
logger.debug(f"{re_match_string=}")

for rule in bazel_rules.findall("rule"):
logger.debug(f"processing {rule.attrib['name']=}")
# logger.debug(f"processing {rule.attrib['name']=}")
if (
re.match(
r".*/BUILD(\.bzl|\.bazel)?\:\d+\:\d+$", rule.attrib["location"]
Expand Down Expand Up @@ -133,15 +133,16 @@ def maven_bazel_dep_to_snyk_dep(self, dep_coordinates: str):
def pip_bazel_dep_to_snyk_dep(self, dep_coordinates: str):
snyk_dep = dep_coordinates
logger.debug(f"PYTHON TEST: {snyk_dep=}")
# match = re.search(r"\@.*\/\/pypi__.*\:(.*).dist\-info.*\/", dep_coordinates)
match = re.search(
r"\@.*_.*\:site-packages\/(.*).dist\-info.*\/.*", dep_coordinates
)
if not match:
match = re.search(r"\@.*\/\/pypi__.*\:(.*).dist\-info.*\/", dep_coordinates)
if match:
snyk_dep = match.group(1)
k = snyk_dep.rfind("-")
snyk_dep = snyk_dep[:k] + "@" + snyk_dep[k + 1 :]
logger.debug(f"PYTHON TEST: {snyk_dep=}")
logger.debug(f"{snyk_dep=}")

return snyk_dep

Expand Down
99 changes: 21 additions & 78 deletions bazel2snyk/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import requests
import typer
import math
import time
import sys
import traceback
Expand Down Expand Up @@ -42,16 +41,13 @@ def __init__(
self.dep_graph = dep_graph
self._visited = []
self._visited_temp = []
self._dep_path_counts = {}
self._target_path_counts = {}
self._oss_deps_count = 0

def bazel_to_depgraph(self, parent_node_id: str, depth: int):
"""
Recursive function that will walk the bazel dep tree.
"""
logger.debug(f"{parent_node_id=},{depth=}")

# global visited_temp, bazel_xml_parser
logger.debug(f"{self._visited_temp=}")

children = self.bazel_xml_parser.get_children_from_rule(
Expand All @@ -63,9 +59,15 @@ def bazel_to_depgraph(self, parent_node_id: str, depth: int):
parent_node_id, self.bazel_xml_parser.pkg_manager_name
)

if parent_dep_snyk != parent_node_id and not parent_dep_snyk.endswith(
f"{BAZEL_TARGET_VERSION_STRING}"
):
self._oss_deps_count += 1
logger.debug(f"{self._oss_deps_count=}")

# special entry for the root node of the dep graph
if depth == 0:
self.dep_graph.set_root_node_package(f"{parent_dep_snyk}")
self.dep_graph.set_root_node_package(parent_dep_snyk)

for child in children:
child_dep_for_snyk = self.snyk_dep_from_bazel_dep(
Expand All @@ -87,23 +89,14 @@ def bazel_to_depgraph(self, parent_node_id: str, depth: int):
logger.debug(f"adding pkg {child_dep_for_snyk=}")
self.dep_graph.add_pkg(child_dep_for_snyk)

# keep track of how many times each dep is encountered
if self.bazel_xml_parser.get_node_type(child) in [BazelNodeType.DEPENDENCY]:
self.increment_dep_path_count(child_dep_for_snyk)

elif self.bazel_xml_parser.get_node_type(child) in [
BazelNodeType.INTERNAL_TARGET,
BazelNodeType.EXTERNAL_TARGET,
]:
self.increment_target_path_count(child_dep_for_snyk)

logger.debug(f"adding dep {child_dep_for_snyk=} for {parent_dep_snyk=}")
self.dep_graph.add_dep(child_dep_for_snyk, parent_dep_snyk)

self._visited_temp.append(parent_node_id)

# if we've already processed this subtree, then just return
if child not in self._visited:
logger.debug(f"{child} not yet visited, traversing...")
self.bazel_to_depgraph(child, depth=depth + 1)
# else:
# future use for smarter pruning
Expand Down Expand Up @@ -137,62 +130,6 @@ def snyk_dep_from_bazel_dep(
else:
return f"{bazel_dep_id}@{BAZEL_TARGET_VERSION_STRING}"

def increment_dep_path_count(self, dep: str):
"""
Increment global dep path counts which is later
used if the dep graph needs to be pruned
"""
self._dep_path_counts[dep] = self._dep_path_counts.get(dep, 0) + 1

def increment_target_path_count(self, dep: str):
"""
Increment global target path counts which is later
used if the dep graph needs to be pruned
"""
self._target_path_counts[dep] = self._target_path_counts.get(dep, 0) + 1

def prune_graph_all(self):
"""
Prune graph whenever OSS dependencies are repeated more than 2x
or when bazel target dependencies are repeated more than 10x
"""
for dep, instances in self.dep_path_counts.items():
if instances > 2:
logger.info(f"pruning {dep} ({instances=})")
self.dep_graph.prune_dep(dep)

for dep, instances in self.target_path_counts.items():
if instances > 10:
logger.info(f"pruning {dep} ({instances=})")
self.dep_graph.prune_dep(dep)

def prune_graph(
self, instance_count_threshold: int, instance_percentage_threshold: int
):
"""
Prune graph according to threshold of duplicated transitive dependencies
"""
self._dep_path_counts.update(self._target_path_counts)
combined_path_counts = self._dep_path_counts

total_item_count = 0

for dep, instances in combined_path_counts.items():
total_item_count += instances
logger.debug(f"{total_item_count=}")

for dep, instances in combined_path_counts.items():
if instances > 1:
instance_percentage = math.ceil((instances / total_item_count) * 100)
if (
instances > instance_count_threshold
or instance_percentage > instance_percentage_threshold
):
logger.info(
f"pruning {dep} ({instances=}/{instance_count_threshold},{instance_percentage=}/{instance_percentage_threshold})"
)
self.dep_graph.prune_dep(dep)


def load_file(file_path: str) -> str:
"""
Expand Down Expand Up @@ -284,7 +221,7 @@ def main(

bazel2snyk.bazel_to_depgraph(parent_node_id=bazel_target, depth=0)

if len(bazel2snyk.dep_graph.graph()["depGraph"]["graph"]["nodes"]) <= 1:
if len(bazel2snyk.dep_graph.graph().depGraph.graph.nodes) <= 1:
logger.error(
f"No {package_source} dependencies found for given target, please verify --bazel-target exists in the source data"
)
Expand All @@ -293,11 +230,13 @@ def main(
if prune_all:
logger.info("Pruning graph ...")
time.sleep(2)
bazel2snyk.prune_graph_all()
# bazel2snyk.prune_graph_all()
bazel2snyk.dep_graph.prune_graph_all()
elif prune:
time.sleep(2)
logger.info("Smart pruning graph (experimental) ...")
bazel2snyk.prune_graph(20, 5)
# bazel2snyk.prune_graph(20, 5)
bazel2snyk.dep_graph.prune_graph(20, 5)
return


Expand All @@ -306,7 +245,9 @@ def print_graph():
"""
Print the Snyk depGraph representation of the dependency graph
"""
print(f"{json.dumps(bazel2snyk.dep_graph.graph(), indent=4)}")
# print(f"{json.dumps(bazel2snyk.dep_graph.graph(), indent=4)}")
# print({bazel2snyk.dep_graph.graph().model_dump_json(indent=4)})
print(json.dumps(bazel2snyk.dep_graph.graph().model_dump(), indent=4))


@cli.command()
Expand All @@ -329,7 +270,8 @@ def test(

typer.echo("Testing depGraph via Snyk API ...", file=sys.stderr)
response: requests.Response = snyk_client.post(
f"{DEPGRAPH_BASE_TEST_URL}{snyk_org_id}", body=bazel2snyk.dep_graph.graph()
f"{DEPGRAPH_BASE_TEST_URL}{snyk_org_id}",
body=bazel2snyk.dep_graph.graph().model_dump(),
)

json_response = response.json()
Expand Down Expand Up @@ -372,7 +314,8 @@ def monitor(

typer.echo("Monitoring depGraph via Snyk API ...", file=sys.stderr)
response: requests.Response = snyk_client.post(
f"{DEPGRAPH_BASE_MONITOR_URL}{snyk_org_id}", body=bazel2snyk.dep_graph.graph()
f"{DEPGRAPH_BASE_MONITOR_URL}{snyk_org_id}",
body=bazel2snyk.dep_graph.graph().model_dump(),
)

json_response = response.json()
Expand Down
Loading
Loading