Skip to content

Commit

Permalink
Merge pull request #2 from therkels/mlir
Browse files Browse the repository at this point in the history
Basic CFG Implementation
  • Loading branch information
Jayanaka-98 authored Nov 15, 2024
2 parents 83d192f + 32f59a4 commit efe9817
Show file tree
Hide file tree
Showing 20 changed files with 1,220 additions and 10 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
.jac_mypy_cache/
.envrc
20 changes: 20 additions & 0 deletions cfg.gv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Control Flow Graph
digraph {
bb0 [label=BB0]
bb1 [label=BB1]
bb2 [label=BB2]
bb3 [label=BB3]
bb4 [label=BB4]
bb5 [label=BB5]
bb6 [label=BB6]
bb7 [label=BB7]
bb0 -> bb7
bb0 -> bb1
bb1 -> bb3
bb1 -> bb2
bb3 -> bb4
bb4 -> bb6
bb4 -> bb5
bb5 -> bb1
bb6 -> bb7
}
Binary file added cfg.gv.pdf
Binary file not shown.
223 changes: 223 additions & 0 deletions cfg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
"""Genrate a control flow graph from genarated bytecode.
This pass generates a control flow graph from the bytecode generated by the previous pass.
Code was initially inspired by:
https://bernsteinbear.com/blog/discovering-basic-blocks/
The rest was extended to handle some nuance and edge conditions that were not considered.
"""
import marshal
import dis
from collections import defaultdict
from typing import List, Optional, Iterator
#import graphviz
#from graphviz import Digraph
CODEUNIT_SIZE = 2
class BytecodeOp:
def __init__(self, op: int, arg: int, offset: int, argval:int, is_jump_target: bool) -> None:
self.op = op
self.arg = arg
self.offset = offset
self.argval = argval
self.is_jump_target= is_jump_target

def __repr__(self):
return f"{self.offset}: f{self.op} - {self.arg} - {self.argval}"
def is_branch(self) -> bool:
return self.op in {
"JUMP_ABSOLUTE",
"JUMP_FORWARD",
"POP_JUMP_IF_TRUE",
"POP_JUMP_IF_FALSE",
"JUMP_IF_TRUE_OR_POP",
"JUMP_IF_FALSE_OR_POP",
}
def is_relative_branch(self) -> bool:
return self.op in {
"FOR_ITER",
"JUMP_FORWARD",
}
def is_return(self) -> bool:
return self.op == "RETURN_VALUE"

def is_raise(self) -> bool:
return self.op == "RAISE_VARARGS"

class Block:
def __init__(self, id: int, instructions: List):
self.id: int = id
self.instructions = instructions
def __repr__(self):
instructions = "\n".join([str(instr) for instr in self.instructions])
return f"bb{self.id}:\n{instructions}"

class BlockMap:
def __init__(self) -> None:
self.idx_to_block: Dict[int, Block] = {}

def add_block(self, idx, block):
self.idx_to_block[idx] = block

def __repr__(self) -> str:
result = []
for block in self.idx_to_block.values():
result.append(repr(block))
return "\n".join(result)
def __str__(self) -> str:
return self.__repr__()

def disassemble_bytecode(bytecode):
code_object = marshal.loads(bytecode)
instructions = []
for i, instr in enumerate(dis.get_instructions(code_object)):
instructions.append(BytecodeOp(
op = instr.opname,
arg=instr.arg,
offset=instr.offset,
argval=instr.argval,
is_jump_target=instr.is_jump_target
))
return instructions

def create_BBs(instructions: List[BytecodeOp]) -> BlockMap:
block_starts = set([0])
block_map = BlockMap()
num_instr = len(instructions)

# Create offset to index mapping
offset_to_index = {instr.offset: idx for idx, instr in enumerate(instructions)}
max_offset = instructions[-1].offset + CODEUNIT_SIZE

def valid_offset(offset):
return offset >= 0 and offset <= max_offset
# Identify all block starts
for instr in instructions:
if instr.is_branch():
next_instr_offset = instr.offset + CODEUNIT_SIZE
if valid_offset(next_instr_offset):
block_starts.add(next_instr_offset)

if instr.is_relative_branch():
target_offset = instr.offset + instr.argval
else:
target_offset = instr.argval

if valid_offset(target_offset):
block_starts.add(target_offset)

if instr.is_jump_target:
block_starts.add(instr.offset)

block_starts_ordered = sorted(block_starts)


for block_id, start_offset in enumerate(block_starts_ordered):
start_index = offset_to_index[start_offset]
end_index = num_instr

# Find the corresponding end_index
for offset in block_starts_ordered:
if offset > start_offset:
end_index = offset_to_index[offset]
break

# Collect instructions for this block
block_instrs = instructions[start_index:end_index]
block_map.add_block(block_id, Block(block_id, block_instrs))

return block_map


class CFG:
def __init__(self):
self.nodes = set()
self.edges = {}

def add_node(self, node_id):
self.nodes.add(node_id)
if node_id not in self.edges:
self.edges[node_id] = []

def add_edge(self, from_node, to_node):
if from_node in self.edges:
self.edges[from_node].append(to_node)
else:
self.edges[from_node] = [to_node]

def __repr__(self):
result = []
for node in self.nodes:
result.append(f'Node bb{node}:')
if node in self.edges and self.edges[node]:
for succ in self.edges[node]:
result.append(f' -> bb{succ}')
return "\n".join(result)
def create_cfg(block_map: BlockMap) -> CFG:
cfg = CFG()

for block_id, block in block_map.idx_to_block.items():
cfg.add_node(block_id)

last_instr = block.instructions[-1]

# Handle conditional jumps (e.g., POP_JUMP_IF_FALSE)
if last_instr.is_branch():
target_offset = last_instr.argval if not last_instr.is_relative_branch() else (last_instr.offset + last_instr.argval)
target_block = find_block_by_offset(block_map, target_offset)
if target_block is not None:
cfg.add_edge(block_id, target_block)
# Fall-through to next block if it's a conditional branch
if last_instr.op.startswith('POP_JUMP_IF'):
fall_through_offset = block.instructions[-1].offset + CODEUNIT_SIZE
fall_through_block = find_block_by_offset(block_map, fall_through_offset)
if fall_through_block is not None:
cfg.add_edge(block_id, fall_through_block)

# Handle unconditional jumps (e.g., JUMP_FORWARD, JUMP_ABSOLUTE)
elif last_instr.op.startswith("JUMP"):
target_offset = last_instr.argval if not last_instr.is_relative_branch() else (last_instr.offset + last_instr.argval)
target_block = find_block_by_offset(block_map, target_offset)
if target_block is not None:
cfg.add_edge(block_id, target_block)

# Handle fall-through to the next block for non-control flow instructions
else:
fall_through_offset = block.instructions[-1].offset + CODEUNIT_SIZE
fall_through_block = find_block_by_offset(block_map, fall_through_offset)
if fall_through_block is not None:
cfg.add_edge(block_id, fall_through_block)

return cfg

def find_block_by_offset(block_map: BlockMap, offset: int) -> int:
for block_id, block in block_map.idx_to_block.items():
if any(instr.offset == offset for instr in block.instructions):
return block_id
return None

# Function to visualize CFG using Graphviz
def visualize_cfg(cfg: CFG):
dot = Digraph(comment="Control Flow Graph")
for node in cfg.nodes:
dot.node(f"bb{node}", f"BB{node}")
for from_node, to_nodes in cfg.edges.items():
for to_node in to_nodes:
dot.edge(f"bb{from_node}", f"bb{to_node}")
return dot

# Sample list of instructions for processing
##simple=
instructions = disassemble_bytecode(b'c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x01\xf3*\x00\x00\x00\x97\x00d\x00d\x01l\x00m\x01Z\x01\x01\x00d\x00Z\x02d\x00Z\x03e\x03d\x00k\\\x00\x00r\x02d\x02Z\x02d\x03Z\x02y\x04)\x05\xe9\x00\x00\x00\x00)\x01\xda\x0bannotations\xe9\x01\x00\x00\x00\xe9\xff\xff\xff\xffN)\x04\xda\n__future__r\x02\x00\x00\x00\xda\x01a\xda\x01x\xa9\x00\xf3\x00\x00\x00\x00\xfaP/Users/jakobtherkelsen/Documents/jaseci-ginS/jac/examples/ginsScripts/simple.jac\xda\x08<module>r\x0b\x00\x00\x00\x01\x00\x00\x00s%\x00\x00\x00\xf0\x03\x01\x01\x01\xf5\x02\x07\x02\x03\xd8\x05\x06\x801\xd8\x05\x06\x801\xd8\x06\x07\x881\x82f\xd8\x07\x08\x80Q\xe0\x05\x07\x811r\t\x00\x00\x00')
#hot path
#instructions = disassemble_bytecode(b'c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x01\xf3T\x00\x00\x00\x97\x00d\x00d\x01l\x00m\x01Z\x01\x01\x00d\x00Z\x02d\x00Z\x03e\x02d\x02k\x02\x00\x00r\x19e\x02d\x03z\x06\x00\x00d\x00k(\x00\x00r\x03d\x04Z\x03n\x02d\x03Z\x03e\x02d\x04z\r\x00\x00Z\x02e\x02d\x02k\x02\x00\x00r\x01\x8c\x18y\x05y\x05)\x06\xe9\x00\x00\x00\x00)\x01\xda\x0bannotations\xe9\x0f\x00\x00\x00\xe9\x02\x00\x00\x00\xe9\x01\x00\x00\x00N)\x04\xda\n__future__r\x02\x00\x00\x00\xda\x01a\xda\x01b\xa9\x00\xf3\x00\x00\x00\x00\xfaR/Users/jakobtherkelsen/Documents/jaseci-ginS/jac/examples/ginsScripts/hot_path.jac\xfa\x08<module>r\x0c\x00\x00\x00\x01\x00\x00\x00sD\x00\x00\x00\xf0\x03\x01\x01\x01\xf5\x02\x0c\x02\x03\xd8\x07\x08\x801\xd8\x07\x08\x801\xd8\t\n\x88R\x8a\x16\xd8\x08\t\x88A\x89\x05\x90\x11\x8a\n\xd8\x0b\x0c\x81q\xf0\x06\x00\x0c\r\x80q\xe0\x05\x06\x88!\x81W\x80Q\xf0\x0f\x00\n\x0b\x88R\x8d\x16r\n\x00\x00\x00')
BBs = create_BBs(instructions)
print(BBs)

cfg = create_cfg(BBs)
print("\nControl Flow Graph (CFG):")
print(cfg)

# Visualize CFG
# dot = visualize_cfg(cfg)
# dot.render('cfg.gv', view=True)
13 changes: 13 additions & 0 deletions jac/examples/ginsScripts/hot_path.jac
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
with entry {
a = 0;
b = 0;
while a < 15 {
if a % 2 == 0{
b = 1;
}
else {
b = 2;
}
a += 1;
}
}
Binary file added jac/examples/ginsScripts/hot_path_cfg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 8 additions & 0 deletions jac/examples/ginsScripts/simple.jac
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
with entry {
a=0;
x=0;
if x >= 0{
a=1;
}
a=-1;
}
Binary file added jac/examples/ginsScripts/simple_cfg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 6 additions & 5 deletions jac/jaclang/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
from jaclang.plugin.feature import JacFeature as Jac
from jaclang.runtimelib.constructs import WalkerArchitype
from jaclang.runtimelib.context import ExecutionContext
from jaclang.runtimelib.machine import JacMachine, JacProgram
from jaclang.runtimelib.machine import JacMachine, JacProgram, ShellGhost
from jaclang.utils.helpers import debugger as db
from jaclang.utils.lang_tools import AstTool



Cmd.create_cmd()
Jac.setup()

Expand Down Expand Up @@ -72,7 +73,7 @@ def format_file(filename: str) -> None:

@cmd_registry.register
def run(
filename: str, session: str = "", main: bool = True, cache: bool = True
filename: str, session: str = "", main: bool = True, cache: bool = True, gins: bool = False
) -> None:
"""Run the specified .jac file."""
# if no session specified, check if it was defined when starting the command shell
Expand All @@ -89,8 +90,9 @@ def run(
base, mod = os.path.split(filename)
base = base if base else "./"
mod = mod[:-4]

jctx = ExecutionContext.create(session=session)
if gins:
JacMachine(base).attach_gin(ShellGhost())

if filename.endswith(".jac"):
try:
Expand Down Expand Up @@ -509,7 +511,6 @@ def jac2py(filename: str) -> None:
else:
print("Not a .jac file.", file=sys.stderr)


def start_cli() -> None:
"""
Start the command line interface.
Expand Down Expand Up @@ -539,4 +540,4 @@ def start_cli() -> None:


if __name__ == "__main__":
start_cli()
start_cli()
Loading

0 comments on commit efe9817

Please sign in to comment.