Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyk: KCFG stores aliases for nodes #2647

Merged
merged 40 commits into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
852f1e5
pyk: kcfg: Add tests for resolving nodes from hashes
nishantjr Jun 1, 2022
a9a42e5
pyk: KCFG: Only allow short hashes with '..' or '...'.
nishantjr Jun 1, 2022
9d8711f
kcfg: Add more tests for bad short hashes.
nishantjr Jun 1, 2022
7b7ffbb
pyk: KCFG: Resolve looks up dict of aliases
nishantjr Jun 1, 2022
f82f8ea
kcfg: Add remove_alias
nishantjr Jun 2, 2022
48665f3
flake8
nishantjr Jun 2, 2022
08b8678
Remove blank line at EOF
nishantjr Jun 7, 2022
30a20ab
Fix incorrect check for short hash with two dots
nishantjr Jun 7, 2022
e902270
kcfg: short_name function for that returns either alias or short_hash…
nishantjr Jun 7, 2022
e9a87f2
kcfg: Remove alias' when node is removed.
nishantjr Jun 7, 2022
498b2d2
kcfg: Use short_names in to_dot, pretty_print
nishantjr Jun 7, 2022
fdba4e8
short_name -> short_id
nishantjr Jun 7, 2022
790eec3
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 8, 2022
55774df
Remove unused parameter to remove_alias
nishantjr Jun 8, 2022
5c6e1a1
Mark nodes that have already had their successors computed as expanded.
nishantjr Jun 10, 2022
dfc9ec5
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 10, 2022
546ddba
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 14, 2022
cb84a94
Better way of getting defaults
nishantjr Jun 20, 2022
19746a6
kcfg.py: Resolve ID before adding to dict
nishantjr Jun 20, 2022
32b0ee1
pycfg: Pretty print: Show dangling nodes as well
nishantjr Jun 21, 2022
774f5d6
Move tests for deconstruct_short_hash to test_utils
nishantjr Jun 21, 2022
9737103
Use assertRaises instead of assertRaisesRegex
nishantjr Jun 21, 2022
aa44ddc
standardize on hashes with exactly three dots
nishantjr Jun 21, 2022
e69db22
kcfg: resolve Aliases must be prefixed with `@'
nishantjr Jun 21, 2022
c826baa
Rename short_names to short_ids. Add docstring
nishantjr Jun 21, 2022
e81fed7
pretty_print: Display all aliases when printing
nishantjr Jun 21, 2022
d94a7f6
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 21, 2022
7140a92
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 21, 2022
9336d8e
add_alias: Throw exception on duplicate, don't allow "@"
nishantjr Jun 22, 2022
4d3459d
remove_alias: Throw if alias does not exist
nishantjr Jun 22, 2022
8c4e860
to_dot: Use node_short_info
nishantjr Jun 22, 2022
4af47ef
Formatting
nishantjr Jun 22, 2022
5099939
Remove now unused short_id/s
nishantjr Jun 22, 2022
89a4c52
More tests for deconstruct_short_hash
nishantjr Jun 22, 2022
6af15d9
_resolve_all => _resolve_hash
nishantjr Jun 22, 2022
3dac0e3
to_dict: Return a copy of _aliases, instead of the reference
nishantjr Jun 22, 2022
9da584c
Use remove_alias() instead of del
nishantjr Jun 22, 2022
d1b1b64
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 22, 2022
69071e4
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 22, 2022
2f026b9
Merge branch 'master' into pyk-kcfg-aliases
nishantjr Jun 23, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions pyk/src/pyk/kcfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
substToMlPred,
)
from .ktool import KPrint
from .utils import add_indent, compare_short_hashes, shorten_hashes
from .utils import add_indent, compare_short_hashes, shorten_hash


class KCFG(Container[Union['KCFG.Node', 'KCFG.Edge', 'KCFG.Cover']]):
Expand Down Expand Up @@ -117,6 +117,7 @@ def pretty_print(self, kprint: KPrint) -> List[str]:
_target: Set[str]
_expanded: Set[str]
_verified: Set[Tuple[str, str]]
_aliases: Dict[str, str]
_lock: RLock

def __init__(self):
Expand All @@ -127,6 +128,7 @@ def __init__(self):
self._target = set()
self._expanded = set()
self._verified = set()
self._aliases = dict()
self._lock = RLock()

def __contains__(self, item: object) -> bool:
Expand Down Expand Up @@ -197,6 +199,7 @@ def to_dict(self) -> Dict[str, Any]:
target = sorted(self._target)
expanded = sorted(self._expanded)
verified = [{"source": source_id, "target": target_id} for source_id, target_id in sorted(self._verified)]
aliases = self._aliases
nishantjr marked this conversation as resolved.
Show resolved Hide resolved

res = {
'nodes': nodes,
Expand All @@ -206,6 +209,7 @@ def to_dict(self) -> Dict[str, Any]:
'target': target,
'expanded': expanded,
'verified': verified,
'aliases': aliases
}
return {k: v for k, v in res.items() if v}

Expand Down Expand Up @@ -253,8 +257,37 @@ def resolve(node_id: str) -> str:
for verified_ids in dct.get('verified') or []:
cfg.add_verified(resolve(verified_ids['source']), resolve(verified_ids['target']))

for alias, id in (dct.get('aliases') or {}).items():
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
cfg.add_alias(name=alias, id=id)
nishantjr marked this conversation as resolved.
Show resolved Hide resolved

return cfg

def short_id(self, node: Node) -> str:
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
for alias, hash in self._aliases.items():
if node.id == hash:
return alias
return shorten_hash(node.id)
tothtamas28 marked this conversation as resolved.
Show resolved Hide resolved

def short_names(self, value: Any, leftChars=6, rightChars=6) -> Any:
result: Any = None
if isinstance(value, KCFG.Node):
result = self.short_id(value)
elif type(value) is tuple:
result = tuple([self.short_names(item) for item in value])
elif type(value) is list:
result = [self.short_names(item) for item in value]
elif type(value) is dict:
result = {}
for (k, v) in value.items():
result[self.short_names(k)] = self.short_names(v)
elif type(value) is set:
result = set()
for item in value:
result.add(self.short_names(item))
else:
assert(False)
return result

def to_json(self) -> str:
return json.dumps(self.to_dict(), sort_keys=True)

Expand All @@ -265,7 +298,7 @@ def from_json(s: str) -> 'KCFG':
def node_short_info(self, node: Node) -> str:
attrs = self.node_attrs(node.id)
attr_string = ' (' + ', '.join(attrs) + ')' if attrs else ''
return shorten_hashes(node.id) + attr_string
return self.short_id(node) + attr_string

def pretty_print(self, kprint: KPrint) -> List[str]:

Expand Down Expand Up @@ -322,7 +355,7 @@ def _short_label(label):
for node in self.nodes:
nodeAttrs = self.node_attrs(node.id)
classAttrs = ' '.join(nodeAttrs)
label = shorten_hashes(node.id) + (classAttrs and ' ' + classAttrs)
label = self.short_id(node) + (classAttrs and ' ' + classAttrs)
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
attrs = {'class': classAttrs} if classAttrs else {}
graph.node(name=node.id, label=label, **attrs)

Expand Down Expand Up @@ -351,21 +384,23 @@ def _short_label(label):

return graph.source

def _resolve_all(self, short_id: str) -> List[str]:
return [node_id for node_id in self._nodes if compare_short_hashes(short_id, node_id)]
def _resolve_all(self, id_like: str) -> List[str]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd call this _resolve_hash, and keep its old behavior.

Then _resolve_or_none can be implemented as follows.

  • If id_like is a hash, then delegate to _resolve_hash, and raise an exception if there are multiple results.
  • Otherwise, we have an alias. If it starts with @, drop the first character.
  • Try to resolve the alias.

As a result, @ is optional, and can be used to enforce strings to be resolved as aliases (as something starting with @ is never a hash). So for example

  • abcd is resolved as a hash
  • @abcd is resolved as alias abcd
  • hello is resolved as alias hello
  • @hello is resolved as alias hello

Optionally, we can also refine _resolve to give a more specific error message if resolution fails:

  • If id_like is a hash: Unknown node hash: ...
  • Otherwise: Unknown alias: ...

What do you think?

Copy link
Contributor Author

@nishantjr nishantjr Jun 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the refactoring. But wasn't the whole point of adding the @ to avoid confusion between the two?

Copy link
Contributor

@tothtamas28 tothtamas28 Jun 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But wasn't the whole point of adding the @ to avoid confusion between the two?

My point is that if id_like is not a valid hash, we can omit the @, because we know we have an alias. But this is just a small usability improvement.

In any case, the input should be validated to be either a hash or an alias (if @ is optional, the latter does not have to be checked).

if id_like in self._aliases:
return [self._aliases[id_like]]
return [node_id for node_id in self._nodes if compare_short_hashes(id_like, node_id)]

def _resolve_or_none(self, short_id: str) -> Optional[str]:
matches = self._resolve_all(short_id)
def _resolve_or_none(self, id_like: str) -> Optional[str]:
matches = self._resolve_all(id_like)
if not matches:
return None
if len(matches) > 1:
raise ValueError(f'Multiple nodes for pattern: {short_id} (matches e.g. {matches[0]} and {matches[1]})')
raise ValueError(f'Multiple nodes for pattern: {id_like} (matches e.g. {matches[0]} and {matches[1]})')
return matches[0]

def _resolve(self, short_id: str) -> str:
match = self._resolve_or_none(short_id)
def _resolve(self, id_like: str) -> str:
match = self._resolve_or_none(id_like)
if not match:
raise ValueError(f'Unknown node: {short_id}')
raise ValueError(f'Unknown node: {id_like}')
return match

def node(self, node_id: str) -> Node:
Expand Down Expand Up @@ -419,6 +454,10 @@ def remove_node(self, node_id: str) -> None:
self._expanded.discard(node_id)
self._verified = set((source_id, target_id) for source_id, target_id in self._verified if source_id != node_id and target_id != node_id)

del_aliases = [alias for alias, id in self._aliases.items() if id == node_id]
for a in del_aliases:
del self._aliases[a]
nishantjr marked this conversation as resolved.
Show resolved Hide resolved

def edge(self, source_id: str, target_id: str) -> Optional[Edge]:
source_id = self._resolve(source_id)
target_id = self._resolve(target_id)
Expand Down Expand Up @@ -537,6 +576,13 @@ def add_verified(self, source_id: str, target_id: str) -> None:
target_id = self._resolve(target_id)
self._verified.add((source_id, target_id))

def add_alias(self, name: str, id: str) -> None:
id = self._resolve(id)
self._aliases[name] = id
nishantjr marked this conversation as resolved.
Show resolved Hide resolved

def remove_alias(self, name: str, alias: str) -> None:
self._aliases.pop(name)

def remove_init(self, node_id: str) -> None:
node_id = self._resolve(node_id)
if node_id not in self._init:
Expand Down
102 changes: 81 additions & 21 deletions pyk/src/pyk/tests/test_kcfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,12 @@
from ..kcfg import KCFG
from ..ktool import KPrint
from ..prelude import token
from ..utils import shorten_hashes


def nid(i: int) -> str:
return node(i).id


def short_id(i: int) -> str:
return shorten_hashes(nid(i))


# over 10 is variables
def term(i: int) -> CTerm:
inside: KInner = token(i)
Expand Down Expand Up @@ -263,11 +258,75 @@ def test_paths_between(self):
},
)

def test_resolve(self):
# Given
d = {
'nodes': node_dicts(4),
'edges': edge_dicts((0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 0)),
}
cfg = KCFG.from_dict(d)

self.assertEqual(node(1), cfg.node('d33...d8'))
self.assertEqual(node(1), cfg.node('d33..d8'))
self.assertEqual(node(1), cfg.node(node(1).id))

# Matches no nodes
with self.assertRaisesRegex(ValueError, 'Unknown node: deadbeef\\.\\.\\.d8'):
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(node(1), cfg.node('deadbeef...d8'))

# Bad short hash: Has digits between dots
with self.assertRaisesRegex(ValueError, 'Bad short hash: 3\\.c62e73544\\.\\.\\.'):
cfg.node('3.c62e73544...')
nishantjr marked this conversation as resolved.
Show resolved Hide resolved

# Bad short hash: Has non hex digits
with self.assertRaisesRegex(ValueError, 'Bad short hash: 3\\.\\.\\.XXX'):
cfg.node('3...XXX')

# Bad short hash: Has more than three dots
with self.assertRaisesRegex(ValueError, 'Bad short hash: 3\\.\\.\\.\\.\\.adf'):
cfg.node('3.....adf')

# Matches all nodes
with self.assertRaisesRegex(ValueError, 'Multiple nodes for pattern: ...'):
cfg.node('...')

# Matches node(0) and node(2)
with self.assertRaisesRegex(ValueError, 'Multiple nodes for pattern: ...'):
cfg.node('3...')

def test_aliases(self):
# Given
d = {
'nodes': node_dicts(2),
'edges': edge_dicts((0, 1)),
'aliases': {'foo': nid(1)}
}

cfg = KCFG.from_dict(d)
self.assertEqual(cfg.node('foo'), node(1))
self.assertEqual(cfg.short_id(node(1)), 'foo')

self.assertNotEqual(cfg.short_id(node(0)), 'bar')
cfg.add_alias('bar', node(0).id)
self.assertEqual(cfg.node('bar'), node(0))
cfg.remove_alias('bar', node(0).id)
with self.assertRaisesRegex(ValueError, 'Bad short hash: bar'):
cfg.node('bar')
self.assertNotEqual(cfg.short_id(node(0)), 'bar')

with self.assertRaisesRegex(ValueError, 'Unknown node: '):
cfg.add_alias('buzz', node(3).id)

cfg.remove_node(nid(1))
cfg.create_node(term(1))
self.assertNotEqual(cfg.short_id(node(1)), 'foo')

def test_pretty_print(self):
d = {
'init': [nid(0)],
'target': [nid(6)],
'nodes': node_dicts(12),
'aliases': {'foo': nid(3)},
# Each of the branching edges have given depth=0 # noqa: E131
'edges': edge_dicts((0, 1), (1, 2, 5), (2, 3), # Initial Linear segment
(3, 4, 0), (4, 5), (5, 2), # Loops back
Expand All @@ -276,46 +335,47 @@ def test_pretty_print(self):
(3, 7, 0), (7, 6), # Go to previous terminal node not as loop
(3, 11, 0), (11, 8) # Covered
),
'covers': cover_dicts((8, 11)) # Loops back
'covers': cover_dicts((8, 11)), # Loops back
}
cfg = KCFG.from_dict(d)

print(set(map(lambda node: node.id, cfg.reachable_nodes(nid(5), reverse=True, traverse_covers=True))))
def _short_id(i) -> str:
return cfg.short_id(node(i))

# TODO: Why are all nodes (besides the target) frontiers?
# TODO: Add a cover
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
self.maxDiff = None
actual = '\n'.join(cfg.pretty_print(mock_kprint())) + '\n'
self.assertMultiLineEqual(actual,
f"{short_id(0)} (init, frontier)\n"
f"{_short_id(0)} (init, frontier)\n"
f"│ (1 step)\n"
f"├ {short_id(1)} (frontier)\n"
f"├ {_short_id(1)} (frontier)\n"
f"│ (5 steps)\n"
f"├ {short_id(2)} (frontier)\n"
f"├ {_short_id(2)} (frontier)\n"
f"│ (1 step)\n"
f"├ {short_id(3)} (frontier)\n"
f"┢━ {short_id(4)} (frontier)\n"
f"├ {_short_id(3)} (frontier)\n"
f"┢━ {_short_id(4)} (frontier)\n"
f"┃ │ (1 step)\n"
f"┃ ├ {short_id(5)} (frontier)\n"
f"┃ ├ {_short_id(5)} (frontier)\n"
f"┃ │ (1 step)\n"
f"┃ ├ {short_id(2)} (frontier)\n"
f"┃ ├ {_short_id(2)} (frontier)\n"
f"┃ ┊ (looped back)\n"
f"┃\n"
f"┣━ {short_id(5)} (frontier)\n"
f"┣━ {_short_id(5)} (frontier)\n"
f"┃ ┊ (continues as previously)\n"
f"┃\n"
f"┣━ {short_id(6)} (target, leaf)\n"
f"┣━ {_short_id(6)} (target, leaf)\n"
f"┃\n"
f"┣━ {short_id(7)} (frontier)\n"
f"┣━ {_short_id(7)} (frontier)\n"
f"┃ │ (1 step)\n"
f"┃ └ {short_id(6)} (target, leaf)\n"
f"┃ └ {_short_id(6)} (target, leaf)\n"
f"┃\n"
f"┗━ {short_id(11)} (frontier)\n"
f"┗━ {_short_id(11)} (frontier)\n"
f" │ (1 step)\n"
f" ├ {short_id(8)} (leaf)\n"
f" ├ {_short_id(8)} (leaf)\n"
f" │ constraint: KApply(label=KLabel(name='#Top', params=(KSort(name='GeneratedTopCell'),)), args=())\n"
f" │ subst:\n"
f" │ KApply(label=KLabel(name='#Equals', params=(KSort(name='K'), KSort(name='K'))), args=(KVariable(name='V11'), KToken(token='8', sort=KSort(name='Int'))))\n"
f" ├ {short_id(11)} (frontier)\n"
f" ├ {_short_id(11)} (frontier)\n"
f" ┊ (looped back)\n\n"
)
23 changes: 20 additions & 3 deletions pyk/src/pyk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ def add_indent(indent: str, lines: List[str]) -> List[str]:
return list(map(lambda line: indent + line, lines))


def is_hexstring(x: str) -> bool:
return all(c in string.hexdigits for c in x)


# Hashes

def hash_str(x: Any) -> str:
Expand All @@ -129,7 +133,7 @@ def is_hash(x: Any) -> bool:
# NB! currently only sha256 in hexdec form is detected
# 2b9e b7c5 441e 9f7e 97f9 a4e5 fc04 a0f7 9f62 c8e9 605a ad1e 02db e8de 3c21 0422
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
return type(x) is str and len(x) == 64 and all(c in string.hexdigits for c in x)
return type(x) is str and len(x) == 64 and is_hexstring(x)


def shorten_hash(h: str, leftChars=6, rightChars=6) -> str:
Expand Down Expand Up @@ -159,7 +163,20 @@ def shorten_hashes(value: Any, leftChars=6, rightChars=6) -> Any:
return result


def deconstruct_short_hash(h: str) -> Tuple[str, str]:
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
x = h.lower()
if is_hash(x):
return (x, x)
nishantjr marked this conversation as resolved.
Show resolved Hide resolved
(l, sep, r) = x.partition('...')
if sep == '...' and is_hexstring(l) and is_hexstring(r):
return (l, r)
(l, sep, r) = x.partition('..')
if sep == '..' and is_hexstring(l) and is_hexstring(r):
return (l, r)
raise ValueError(f'Bad short hash: {h}')


def compare_short_hashes(lhs: str, rhs: str):
left, right = lhs.split('.'), rhs.split('.')
(l0, l1, r0, r1) = (left[0].upper(), left[-1].upper(), right[0].upper(), right[-1].upper())
(l0, l1) = deconstruct_short_hash(lhs)
(r0, r1) = deconstruct_short_hash(rhs)
return (l0.startswith(r0) or r0.startswith(l0)) and (l1.endswith(r1) or r1.endswith(l1))