Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of intersection algorithm for regular grammars #34

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions src/problems/MultipleSource/algo/matrix_bfs/intersection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
from itertools import product
from typing import Dict

from pyformlang.finite_automaton.state import State
from pyformlang.finite_automaton import EpsilonNFA
from pyformlang.finite_automaton.symbol import Symbol

from pygraphblas.types import BOOL
from pygraphblas.matrix import Matrix
from pygraphblas.vector import Vector
from pygraphblas import descriptor
from pygraphblas import Accum, binaryop

from src.graph.graph import Graph
from src.problems.MultipleSource.algo.matrix_bfs.reg_automaton import RegAutomaton


class Intersection:
"""
Implementations of graph and regular grammar intersection algorithm
"""

def __init__(self, graph: Graph, regular_automaton: RegAutomaton):
self.graph = graph
self.graph.load_bool_graph()
self.regular_automaton = regular_automaton
self.intersection_matrices = dict()
self.__create_intersection_matrices__()

def __create_intersection_matrices__(self):
num_vert_graph = self.graph.get_number_of_vertices()
num_vert_regex = self.regular_automaton.num_states
num_verts_inter = num_vert_graph * num_vert_regex

for symbol in self.regular_automaton.matrices:
if symbol in self.graph:
self.intersection_matrices[symbol] = Matrix.sparse(
BOOL, num_verts_inter, num_verts_inter
)

def __to_automaton__(self) -> EpsilonNFA:
"""
Build automata from matrices
"""
enfa = EpsilonNFA()
graph_vertices_num = self.graph.get_number_of_vertices()

start_states = [
self.to_inter_coord(x, y)
for x, y in product(
range(graph_vertices_num), self.regular_automaton.start_states
)
]

final_states = [
self.to_inter_coord(x, y)
for x, y in product(
range(graph_vertices_num), self.regular_automaton.final_states
)
]

for start_state in start_states:
enfa.add_start_state(State(start_state))

for final_state in final_states:
enfa.add_final_state(State(final_state))

for symbol in self.intersection_matrices:
matrix = self.intersection_matrices[symbol]

for row, col in zip(matrix.rows, matrix.cols):
enfa.add_transition(State(row), Symbol(symbol), State(col))

return enfa

def to_inter_coord(self, graph_vert, reg_vert) -> int:
"""
Converts coordinates of graph vertice and regex vertice
to intersection coordinates vertice
"""
return reg_vert * self.graph.get_number_of_vertices() + graph_vert

def create_diag_matrices(self) -> Dict[str, Matrix]:
"""
Create a block diagonal matrices from graph and regex matrices for each symbol
"""
num_vert_graph = self.graph.get_number_of_vertices()
num_vert_regex = self.regular_automaton.num_states
diag_num_verts = num_vert_graph + num_vert_regex

diag_matrices = dict()
for symbol in self.regular_automaton.matrices:
if symbol in self.graph:
diag_matrix = Matrix.sparse(BOOL, diag_num_verts, diag_num_verts)
diag_matrix.assign_matrix(
self.regular_automaton.matrices[symbol],
slice(0, num_vert_regex - 1),
slice(0, num_vert_regex - 1),
)
diag_matrix.assign_matrix(
self.graph[symbol],
slice(num_vert_regex, diag_num_verts - 1),
slice(num_vert_regex, diag_num_verts - 1),
)

diag_matrices[symbol] = diag_matrix

return diag_matrices

def create_masks_matrix(self) -> Matrix:
num_vert_graph = self.graph.get_number_of_vertices()
num_vert_regex = self.regular_automaton.num_states
num_verts_diag = num_vert_graph + num_vert_regex

mask_matrix = Matrix.identity(BOOL, num_vert_regex, value=True)
mask_matrix.resize(num_vert_regex, num_verts_diag)

return mask_matrix

def intersect_bfs(self, src_verts) -> EpsilonNFA:
"""
Intersection implementation with synchronous breadth first traversal
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this function solves reachability problem, not automata intersection. Isn't it?

of a graph and regular grammar represented in automata
"""
num_vert_graph = self.graph.get_number_of_vertices()
num_vert_regex = self.regular_automaton.num_states

num_verts_inter = num_vert_graph * num_vert_regex
num_verts_diag = num_vert_graph + num_vert_regex

graph = self.graph
regex = self.regular_automaton.matrices

regex_start_states = self.regular_automaton.start_states

diag_matrices = self.create_diag_matrices()

result = Matrix.sparse(BOOL, num_vert_graph, num_vert_graph)

# create a mask of source vertices vector
m_src_v = Vector.from_lists(src_verts, [True for _ in range(len(src_verts))], size=num_vert_graph)

# initialize matrices for multiple source bfs
ident = self.create_masks_matrix()
vect = ident.dup()
found = ident.dup()

# fill start states
for reg_start_state in regex_start_states:
for gr_start_state in src_verts:
found[reg_start_state, num_vert_regex + gr_start_state] = True

# matrix which contains newly found nodes on each iteration
found_on_iter = found.dup()

# Algo's body
not_empty = True
level = 0
while not_empty and level < num_verts_inter:
# for each symbol we are going to store if any new nodes were found during traversal.
# if none are found, then 'not_empty' flag turns False, which means that no matrices change anymore
# and we can stop the traversal
not_empty_for_at_least_one_symbol = False

vect.assign_matrix(found_on_iter, mask=vect, desc=descriptor.RC)
vect.assign_scalar(True, mask=ident)

# stores found nodes for each symbol
found_on_iter.assign_matrix(ident)

for symbol in regex:
if symbol in graph:
with BOOL.ANY_PAIR:
found = vect.mxm(diag_matrices[symbol])

with Accum(binaryop.MAX_BOOL):
# extract left (grammar) part of the masks matrix and rearrange rows
i_x, i_y, _ = found.extract_matrix(col_index=slice(0, num_vert_regex - 1)).to_lists()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can extract_matrix be replaced with Python array slicing?

for i in range(len(i_y)):
found_on_iter.assign_row(i_y[i], found.extract_row(i_x[i]))

# check if new nodes were found. if positive, switch the flag
if not found_on_iter.iseq(vect):
not_empty_for_at_least_one_symbol = True

# extract right (graph) part of the masks matrix and get a row of reachable nodes in a graph
reachable = found_on_iter.extract_matrix(
col_index=slice(num_vert_regex, num_verts_diag - 1)
).T.reduce_vector(BOOL.ANY_MONOID) # reduce by columns

# update graph boolean matrix for every source vertex
# result matrix contains reachability for every symbol combined
with Accum(binaryop.MAX_BOOL):
for st_v in src_verts:
result.assign_row(st_v, reachable, mask=m_src_v, desc=descriptor.C)

not_empty = not_empty_for_at_least_one_symbol
level += 1

return result
31 changes: 31 additions & 0 deletions src/problems/MultipleSource/algo/matrix_bfs/matrix_bfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from pyformlang.cfg import CFG

from src.grammar.rsa import RecursiveAutomaton

from src.graph.graph import Graph
from src.graph.label_graph import LabelGraph

from src.problems.AllPaths.AllPaths import AllPathsProblem
from src.problems.utils import ResultAlgo


class ProblemAlgo(AllPathsProblem):
"""
For now we have regular grammar only in this algo.
Hence this is to be implemented with CFG.
"""

def prepare(self, graph: Graph, grammar: CFG):
pass

def prepare_for_solve(self):
pass

def solve(self):
pass

def prepare_for_exctract_paths(self):
pass

def getPaths(self, v_start: int, v_finish: int, nonterminal: str, max_len: int):
pass
49 changes: 49 additions & 0 deletions src/problems/MultipleSource/algo/matrix_bfs/reg_automaton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

from pyformlang.regular_expression.regex import Regex

from pygraphblas.matrix import Matrix
from pygraphblas.types import BOOL


class RegAutomaton:
"""
Automata representation of regular grammar
"""

def __init__(self, regex: Regex):
self.enfa = regex.to_epsilon_nfa().minimize()

self.states = self.enfa.states
self.num_states = len(self.states)

self.enum_states = dict(zip(self.states, range(self.num_states)))
self.start_states = [
self.enum_states[state] for state in self.enfa.start_states
]
self.final_states = [
self.enum_states[state] for state in self.enfa.final_states
]

self.matrices = dict()
self.load_bool_matrices()

def from_regex_txt(path) -> RegAutomaton:
with open(path, "r") as file:
regex = Regex(file.readline())

return RegAutomaton(regex)

def load_bool_matrices(self) -> None:
"""
Creates boolean matrices for self automata
"""
for src_node, transition in self.enfa.to_dict().items():
for symbol, tgt_node in transition.items():
if symbol not in self.matrices:
self.matrices[symbol] = Matrix.sparse(
BOOL, self.num_states, self.num_states
)

matr = self.matrices[symbol]
matr[self.enum_states[src_node], self.enum_states[tgt_node]] = True
93 changes: 93 additions & 0 deletions test/MultipleSource/test_bfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import pytest

from src.graph.graph import Graph

from src.problems.MultipleSource.algo.matrix_bfs.intersection import Intersection
from src.problems.MultipleSource.algo.matrix_bfs.reg_automaton import RegAutomaton

from src.utils.useful_paths import LOCAL_CFPQ_DATA


@pytest.mark.CI
def test_case_regular_cycle():
test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/cycle")

graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt"))
grammar = RegAutomaton.from_regex_txt(
test_data_path.joinpath("Grammars/regex_1.txt")
)

intersection = Intersection(graph, grammar)

source_verts = [0]
result = intersection.intersect_bfs(source_verts)

assert result.nvals == 2 * len(source_verts)


@pytest.mark.CI
def test_case_regular_disconnected():
test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/disconnected")

graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt"))
grammar = RegAutomaton.from_regex_txt(
test_data_path.joinpath("Grammars/regex_1.txt")
)

intersection = Intersection(graph, grammar)

source_verts = [0, 3]
result = intersection.intersect_bfs(source_verts)

assert result.nvals == 2 * len(source_verts)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we check not only size of result, but also its content?



@pytest.mark.CI
def test_case_regular_loop():
test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/loop")

graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt"))
grammar = RegAutomaton.from_regex_txt(
test_data_path.joinpath("Grammars/regex_1.txt")
)

intersection = Intersection(graph, grammar)

source_verts = [0, 2]
result = intersection.intersect_bfs(source_verts)

assert result.nvals == 0 * len(source_verts)


@pytest.mark.CI
def test_case_regular_midsymbol():
test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/midsymbol")

graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt"))
grammar = RegAutomaton.from_regex_txt(
test_data_path.joinpath("Grammars/regex_1.txt")
)

intersection = Intersection(graph, grammar)

source_verts = [0]
result = intersection.intersect_bfs(source_verts)

assert result.nvals == 1 * len(source_verts)


@pytest.mark.CI
def test_case_regular_two_cycles():
test_data_path = LOCAL_CFPQ_DATA.joinpath("regular/two_cycles")

graph = Graph.from_txt(test_data_path.joinpath("Graphs/graph_1.txt"))
grammar = RegAutomaton.from_regex_txt(
test_data_path.joinpath("Grammars/regex_1.txt")
)

intersection = Intersection(graph, grammar)

source_verts = [0, 3]
result = intersection.intersect_bfs(source_verts)

assert result.nvals == 2 * len(source_verts)
1 change: 1 addition & 0 deletions test/data/regular/cycle/Grammars/regex_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a a*
3 changes: 3 additions & 0 deletions test/data/regular/cycle/Graphs/graph_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
0 a 1
1 a 2
2 a 0
1 change: 1 addition & 0 deletions test/data/regular/disconnected/Grammars/regex_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a* b (a|b)*
8 changes: 8 additions & 0 deletions test/data/regular/disconnected/Graphs/graph_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
0 a 1
0 b 0
1 a 1
1 b 2
2 a 2
2 b 2
3 a 3
3 b 3
Loading