Skip to content

Commit

Permalink
Output call tree analysis as CSV for use in graph database
Browse files Browse the repository at this point in the history
* Methods and virtual methods are represented with graph nodes.
* Direct, virtual and overriden-by relationships have been mapped.
* Bytecode indexes are part of the relationships.
* A method can interact with others multiple times,
with each bytecode index indicating the origin of the call
with the origin method.
  • Loading branch information
galderz committed Mar 1, 2021
1 parent 4ec9f16 commit 140920a
Showing 1 changed file with 278 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,24 +36,36 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.oracle.graal.pointsto.BigBang;
import com.oracle.graal.pointsto.flow.InvokeTypeFlow;
import com.oracle.graal.pointsto.meta.AnalysisMethod;

import jdk.vm.ci.code.BytecodePosition;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.ResolvedJavaMethod;
import jdk.vm.ci.meta.ResolvedJavaType;

public final class CallTreePrinter {

public static final Pattern CAMEL_CASE_PATTERN = Pattern.compile(
"\\b[a-zA-Z]|[A-Z]|\\.");

public static void print(BigBang bigbang, String path, String reportName) {
CallTreePrinter printer = new CallTreePrinter(bigbang);
printer.buildCallTree();
Expand All @@ -66,6 +78,8 @@ public static void print(BigBang bigbang, String path, String reportName) {
writer -> printer.printClasses(writer, false));
ReportUtils.report("list of used packages", path + File.separatorChar + "reports", "used_packages_" + reportName, "txt",
writer -> printer.printClasses(writer, true));

printCsvFiles(printer.methodToNode, path, reportName);
}

interface Node {
Expand Down Expand Up @@ -300,4 +314,268 @@ private static String packagePrefix(String name) {
}
return name.substring(0, lastDot);
}

private static void printCsvFiles(Map<AnalysisMethod, MethodNode> methodToNode, String path, String reportName) {
// Set virtual node at next available method id
final AtomicInteger virtualNodeId = new AtomicInteger(MethodNode.methodId);

Set<Integer> entryPointIds = new HashSet<>();
Set<MethodNode> nonVirtualNodes = new HashSet<>();
Map<List<String>, Integer> virtualNodes = new HashMap<>();

Map<Integer, Set<BciEndEdge>> directEdges = new HashMap<>();
Map<Integer, Set<BciEndEdge>> virtualEdges = new HashMap<>();
Map<Integer, Set<Integer>> overridenByEdges = new HashMap<>();

final Iterator<MethodNode> iterator = methodToNode.values().stream().filter(n -> n.isEntryPoint).iterator();
while (iterator.hasNext()) {
final MethodNode node = iterator.next();
entryPointIds.add(node.id);
walkNodes(node, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}

ReportUtils.report("call tree for vm entry point", path + File.separatorChar + "reports", "csv_call_tree_vm_" + reportName, "csv",
CallTreePrinter::printVMEntryPoint);

ReportUtils.report("call tree for methods", path + File.separatorChar + "reports", "csv_call_tree_methods_" + reportName, "csv",
writer -> printMethodNodes(methodToNode.values(), writer));

ReportUtils.report("call tree for virtual methods", path + File.separatorChar + "reports", "csv_call_tree_virtual_methods_" + reportName, "csv",
writer -> printVirtualNodes(virtualNodes, writer));

ReportUtils.report("call tree for entry points", path + File.separatorChar + "reports", "csv_call_tree_entry_points_" + reportName, "csv",
writer -> printEntryPointIds(entryPointIds, writer));

ReportUtils.report("call tree for direct edges", path + File.separatorChar + "reports", "csv_call_tree_direct_edges_" + reportName, "csv",
writer -> printBciEdges(directEdges, writer));

ReportUtils.report("call tree for overriden by edges", path + File.separatorChar + "reports", "csv_call_tree_override_by_edges_" + reportName, "csv",
writer -> printNonBciEdges(overridenByEdges, writer));

ReportUtils.report("call tree for virtual edges", path + File.separatorChar + "reports", "csv_call_tree_virtual_edges_" + reportName, "csv",
writer -> printBciEdges(virtualEdges, writer));
}

private static void printVMEntryPoint(PrintWriter writer) {
writer.println(convertToCSV("Id", "Name"));
writer.println(convertToCSV("0", "VM"));
}

private static void printMethodNodes(Collection<MethodNode> methods, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
methods.stream()
.map(CallTreePrinter::methodNodeInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> methodNodeInfo(MethodNode method) {
return resolvedJavaMethodInfo(method.id, method.method);
}

private static void walkNodes(MethodNode methodNode, Map<Integer, Set<BciEndEdge>> directEdges, Map<Integer, Set<BciEndEdge>> virtualEdges, Map<Integer, Set<Integer>> overridenByEdges,
Map<List<String>, Integer> virtualNodes, Set<MethodNode> nonVirtualNodes, AtomicInteger virtualNodeId) {
for (InvokeNode invoke : methodNode.invokes) {
if (invoke.isDirectInvoke) {
if (invoke.callees.size() > 0) {
Node calleeNode = invoke.callees.get(0);
addDirectEdge(methodNode.id, invoke, calleeNode, directEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
} else {
final int nodeId = addVirtualNode(invoke, virtualNodes, virtualNodeId);
addVirtualMethodEdge(methodNode.id, invoke, nodeId, virtualEdges);
for (Node calleeNode : invoke.callees) {
addOverridenByEdge(nodeId, calleeNode, overridenByEdges, nonVirtualNodes);
if (calleeNode instanceof MethodNode) {
walkNodes((MethodNode) calleeNode, directEdges, virtualEdges, overridenByEdges, virtualNodes, nonVirtualNodes, virtualNodeId);
}
}
}
}
}

private static void addDirectEdge(int nodeId, InvokeNode invoke, Node calleeNode, Map<Integer, Set<BciEndEdge>> edges, Set<MethodNode> nodes) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(new BciEndEdge(methodNode.id, bytecodeIndexes(invoke)));
}

private static List<Integer> bytecodeIndexes(InvokeNode node) {
return Stream.of(node.sourceReferences)
.map(source -> source.bci)
.collect(Collectors.toList());
}

private static int addVirtualNode(InvokeNode node, Map<List<String>, Integer> virtualNodes, AtomicInteger virtualNodeId) {
final List<String> virtualMethodInfo = virtualMethodInfo(node.targetMethod);
return virtualNodes.computeIfAbsent(virtualMethodInfo, k -> virtualNodeId.getAndIncrement());
}

private static void addVirtualMethodEdge(int startId, InvokeNode invoke, int endId, Map<Integer, Set<BciEndEdge>> edges) {
Set<BciEndEdge> nodeEdges = edges.computeIfAbsent(startId, k -> new HashSet<>());
nodeEdges.add(new BciEndEdge(endId, bytecodeIndexes(invoke)));
}

private static void printVirtualNodes(Map<List<String>, Integer> virtualNodes, PrintWriter writer) {
writer.println(convertToCSV("Id", "Name", "Type", "Parameters", "Return", "Display"));
virtualNodes.entrySet().stream()
.map(CallTreePrinter::virtualMethodAndIdInfo)
.map(CallTreePrinter::convertToCSV)
.forEach(writer::println);
}

private static List<String> virtualMethodAndIdInfo(Map.Entry<List<String>, Integer> entry) {
final List<String> methodInfo = entry.getKey();
final List<String> result = new ArrayList<>(methodInfo.size() + 1);
result.add(String.valueOf(entry.getValue()));
for (int i = 1; i < methodInfo.size(); i++) {
result.add(i, methodInfo.get(i));
}
return result;
}

private static void printEntryPointIds(Set<Integer> entryPoints, PrintWriter writer) {
writer.println(convertToCSV("Id"));
entryPoints.forEach(writer::println);
}

private static void addOverridenByEdge(int nodeId, Node calleeNode, Map<Integer, Set<Integer>> edges, Set<MethodNode> nodes) {
Set<Integer> nodeEdges = edges.computeIfAbsent(nodeId, k -> new HashSet<>());
MethodNode methodNode = calleeNode instanceof MethodNode
? (MethodNode) calleeNode
: ((MethodNodeReference) calleeNode).methodNode;
nodes.add(methodNode);
nodeEdges.add(methodNode.id);
}

private static void printBciEdges(Map<Integer, Set<BciEndEdge>> edges, PrintWriter writer) {
final Set<BciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new BciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId", "BytecodeIndexes"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endEdge.id), showBytecodeIndexes(edge.endEdge.bytecodeIndexes)))
.forEach(writer::println);
}

private static String showBytecodeIndexes(List<Integer> bytecodeIndexes) {
return bytecodeIndexes.stream()
.map(String::valueOf)
.collect(Collectors.joining("->"));
}

private static void printNonBciEdges(Map<Integer, Set<Integer>> edges, PrintWriter writer) {
final Set<NonBciEdge> idEdges = edges.entrySet().stream()
.flatMap(entry -> entry.getValue().stream().map(endId -> new NonBciEdge(entry.getKey(), endId)))
.collect(Collectors.toSet());

writer.println(convertToCSV("StartId", "EndId"));
idEdges.stream()
.map(edge -> convertToCSV(String.valueOf(edge.startId), String.valueOf(edge.endId)))
.forEach(writer::println);
}

private static List<String> virtualMethodInfo(AnalysisMethod method) {
return resolvedJavaMethodInfo(null, method);
}

private static List<String> resolvedJavaMethodInfo(Integer id, ResolvedJavaMethod method) {
// TODO method parameter types are opaque, but could in the future be split out and link
// together
// e.g. each method could BELONG to a type, and a method could have PARAMETER relationships
// with N types
// see https://neo4j.com/developer/guide-import-csv/#_converting_data_values_with_load_csv
// for examples
final String parameters = method.getSignature().getParameterCount(false) > 0
? method.format("%P").replace(",", "")
: "empty";

return Arrays.asList(
id == null ? null : Integer.toString(id),
method.getName(),
method.getDeclaringClass().toJavaName(true),
parameters,
method.getSignature().getReturnType(null).toJavaName(true),
display(method));
}

private static String display(ResolvedJavaMethod method) {
final ResolvedJavaType type = method.getDeclaringClass();
final String typeName = type.toJavaName(true);
if (type.getJavaKind() == JavaKind.Object) {
List<String> matchResults = new ArrayList<>();
Matcher matcher = CAMEL_CASE_PATTERN.matcher(typeName);
while (matcher.find()) {
matchResults.add(matcher.toMatchResult().group());
}

return String.join("", matchResults) + "." + method.getName();
}

return typeName + "." + method.getName();
}

private static String convertToCSV(String... data) {
return String.join(",", data);
}

private static String convertToCSV(List<String> data) {
return String.join(",", data);
}

private static final class NonBciEdge {

final int startId;
final int endId;

private NonBciEdge(int startId, int endId) {
this.startId = startId;
this.endId = endId;
}
}

private static final class BciEdge {
final int startId;
final BciEndEdge endEdge;

private BciEdge(int startId, BciEndEdge endEdge) {
this.startId = startId;
this.endEdge = endEdge;
}
}

private static final class BciEndEdge {
final int id;
final List<Integer> bytecodeIndexes;

private BciEndEdge(int id, List<Integer> bytecodeIndexes) {
this.id = id;
this.bytecodeIndexes = bytecodeIndexes;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
BciEndEdge endEdge = (BciEndEdge) o;
return id == endEdge.id &&
bytecodeIndexes.equals(endEdge.bytecodeIndexes);
}

@Override
public int hashCode() {
return Objects.hash(id, bytecodeIndexes);
}
}
}

0 comments on commit 140920a

Please sign in to comment.