Skip to content

Commit fb5b464

Browse files
committed
Add from_gql_create constructor
1 parent c9ce0b5 commit fb5b464

File tree

3 files changed

+340
-0
lines changed

3 files changed

+340
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Import from the Neo4j Graph Data Science Library
2+
------------------------------------------------
3+
4+
.. automodule:: neo4j_viz.gql_create
5+
:members:
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
import re
2+
import uuid
3+
from typing import Any, Optional
4+
5+
from neo4j_viz import Node, Relationship, VisualizationGraph
6+
7+
8+
def _parse_value(value_str: str) -> Any:
9+
value_str = value_str.strip()
10+
if not value_str:
11+
return None
12+
13+
# Parse object
14+
if value_str.startswith("{") and value_str.endswith("}"):
15+
inner = value_str[1:-1].strip()
16+
result = {}
17+
depth = 0
18+
in_string = None
19+
start_idx = 0
20+
for i, ch in enumerate(inner):
21+
if in_string is None:
22+
if ch in ["'", '"']:
23+
in_string = ch
24+
elif ch in ["{", "["]:
25+
depth += 1
26+
elif ch in ["}", "]"]:
27+
depth -= 1
28+
elif ch == "," and depth == 0:
29+
segment = inner[start_idx:i].strip()
30+
if ":" not in segment:
31+
return None
32+
k, v = segment.split(":", 1)
33+
k = k.strip().strip("'\"")
34+
result[k] = _parse_value(v)
35+
start_idx = i + 1
36+
else:
37+
if ch == in_string:
38+
in_string = None
39+
if inner[start_idx:]:
40+
segment = inner[start_idx:].strip()
41+
if ":" not in segment:
42+
return None
43+
k, v = segment.split(":", 1)
44+
k = k.strip().strip("'\"")
45+
result[k] = _parse_value(v)
46+
return result
47+
48+
# Parse list
49+
if value_str.startswith("[") and value_str.endswith("]"):
50+
inner = value_str[1:-1].strip()
51+
items = []
52+
depth = 0
53+
in_string = None
54+
start_idx = 0
55+
for i, ch in enumerate(inner):
56+
if in_string is None:
57+
if ch in ["'", '"']:
58+
in_string = ch
59+
elif ch in ["{", "["]:
60+
depth += 1
61+
elif ch in ["}", "]"]:
62+
depth -= 1
63+
elif ch == "," and depth == 0:
64+
items.append(_parse_value(inner[start_idx:i]))
65+
start_idx = i + 1
66+
else:
67+
if ch == in_string:
68+
in_string = None
69+
if inner[start_idx:]:
70+
items.append(_parse_value(inner[start_idx:]))
71+
return items
72+
73+
# Parse boolean, float, int, or string
74+
if re.match(r"^-?\d+$", value_str):
75+
return int(value_str)
76+
if re.match(r"^-?\d+\.\d+$", value_str):
77+
return float(value_str)
78+
if value_str.lower() == "true":
79+
return True
80+
if value_str.lower() == "false":
81+
return False
82+
if value_str.lower() == "null":
83+
return None
84+
return value_str.strip("'\"")
85+
86+
87+
def _get_snippet(q: str, idx: int, context: int = 15) -> str:
88+
start = max(0, idx - context)
89+
end = min(len(q), idx + context)
90+
return q[start:end].replace("\n", " ")
91+
92+
93+
def from_gql_create(query: str) -> VisualizationGraph:
94+
"""
95+
Parse a GQL CREATE query and return a VisualizationGraph object representing the graph it creates.
96+
97+
Please note that this function is not a full GQL parser, it only handles CREATE queries that do not contain
98+
other clauses like MATCH, WHERE, RETURN, etc, or any Cypher function calls.
99+
It also does not handle all possible GQL syntax, but it should work for most common cases.
100+
101+
Parameters
102+
----------
103+
query : str
104+
The GQL CREATE query to parse
105+
"""
106+
107+
def parse_prop_str(prop_str: str, prop_start: int, props: dict[str, Any]) -> None:
108+
depth = 0
109+
in_string = None
110+
start_idx = 0
111+
for i, ch in enumerate(prop_str):
112+
if in_string is None:
113+
if ch in ["'", '"']:
114+
in_string = ch
115+
elif ch in ["{", "["]:
116+
depth += 1
117+
elif ch in ["}", "]"]:
118+
depth -= 1
119+
elif ch == "," and depth == 0:
120+
pair = prop_str[start_idx:i].strip()
121+
if ":" not in pair:
122+
snippet = _get_snippet(query, prop_start + start_idx)
123+
raise ValueError(f"Property syntax error near: `{snippet}`.")
124+
k, v = pair.split(":", 1)
125+
k = k.strip().strip("'\"")
126+
props[k] = _parse_value(v)
127+
start_idx = i + 1
128+
else:
129+
if ch == in_string:
130+
in_string = None
131+
if prop_str[start_idx:]:
132+
pair = prop_str[start_idx:].strip()
133+
if ":" not in pair:
134+
snippet = _get_snippet(query, prop_start + start_idx)
135+
raise ValueError(f"Property syntax error near: `{snippet}`.")
136+
k, v = pair.split(":", 1)
137+
k = k.strip().strip("'\"")
138+
props[k] = _parse_value(v)
139+
140+
def parse_labels_and_props(s: str) -> tuple[Optional[str], dict[str, Any]]:
141+
props = {}
142+
prop_match = re.search(r"\{(.*)\}", s)
143+
prop_str = ""
144+
if prop_match:
145+
prop_str = prop_match.group(1)
146+
prop_start = query.index(prop_str, query.index(s))
147+
s = s[: prop_match.start()].strip()
148+
alias_labels = re.split(r"[:&]", s)
149+
raw_alias = alias_labels[0].strip()
150+
final_alias = raw_alias if raw_alias else None
151+
152+
label_list = [lbl.strip() for lbl in alias_labels[1:]]
153+
props["__labels"] = sorted(label_list)
154+
155+
if prop_str:
156+
parse_prop_str(prop_str, prop_start, props)
157+
return final_alias, props
158+
159+
nodes = []
160+
relationships = []
161+
alias_to_id = {}
162+
anonymous_count = 0
163+
164+
query = query.strip().removeprefix("CREATE").rstrip(";").strip()
165+
parts = []
166+
bracket_level = 0
167+
current: list[str] = []
168+
for i, char in enumerate(query):
169+
if char == "(":
170+
bracket_level += 1
171+
elif char == ")":
172+
bracket_level -= 1
173+
if bracket_level < 0:
174+
snippet = _get_snippet(query, i)
175+
raise ValueError(f"Unbalanced parentheses near: `{snippet}`.")
176+
if char == "," and bracket_level == 0:
177+
parts.append("".join(current).strip())
178+
current = []
179+
else:
180+
current.append(char)
181+
parts.append("".join(current).strip())
182+
if bracket_level != 0:
183+
snippet = _get_snippet(query, len(query) - 1)
184+
raise ValueError(f"Unbalanced parentheses near: `{snippet}`.")
185+
186+
node_pattern = re.compile(r"^\(([^)]+)\)$")
187+
rel_pattern = re.compile(r"^\(([^)]+)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]+)\)$")
188+
189+
for part in parts:
190+
node_m = node_pattern.match(part)
191+
if node_m:
192+
alias_labels_props = node_m.group(1).strip()
193+
alias, props = parse_labels_and_props(alias_labels_props)
194+
if not alias:
195+
alias = f"_anon_{anonymous_count}"
196+
anonymous_count += 1
197+
if alias not in alias_to_id:
198+
alias_to_id[alias] = str(uuid.uuid4())
199+
nodes.append(Node(id=alias_to_id[alias], properties=props))
200+
else:
201+
rel_m = rel_pattern.match(part)
202+
if rel_m:
203+
left_node = rel_m.group(1).strip()
204+
rel_type = rel_m.group(2).replace(":", "").strip()
205+
right_node = rel_m.group(4).strip()
206+
207+
left_alias, left_props = parse_labels_and_props(left_node)
208+
if not left_alias or left_alias not in alias_to_id:
209+
snippet = _get_snippet(query, query.index(left_node))
210+
raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.")
211+
212+
right_alias, right_props = parse_labels_and_props(right_node)
213+
if not right_alias or right_alias not in alias_to_id:
214+
snippet = _get_snippet(query, query.index(right_node))
215+
raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.")
216+
217+
rel_id = str(uuid.uuid4())
218+
rel_props = {"__type": rel_type}
219+
rel_props_str = rel_m.group(3) or ""
220+
if rel_props_str:
221+
inner_str = rel_props_str.strip("{}").strip()
222+
prop_start = query.index(inner_str, query.index(inner_str))
223+
parse_prop_str(inner_str, prop_start, rel_props)
224+
225+
relationships.append(
226+
Relationship(
227+
id=rel_id,
228+
source=alias_to_id[left_alias],
229+
target=alias_to_id[right_alias],
230+
properties=rel_props,
231+
)
232+
)
233+
else:
234+
snippet = part[:30]
235+
raise ValueError(f"Invalid element in CREATE near: `{snippet}`.")
236+
237+
return VisualizationGraph(nodes=nodes, relationships=relationships)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import pytest
2+
3+
from neo4j_viz.gql_create import from_gql_create
4+
5+
6+
def test_from_gql_create() -> None:
7+
query = """
8+
CREATE
9+
(a:User {name: 'Alice', age: 23}),
10+
(b:User:person {name: "Bridget", age: 34}),
11+
(wizardMan:User {name: 'Charles: The wizard, man', hello: true, height: NULL}),
12+
(d:User),
13+
14+
(a)-[:LINK {weight: 0.5}]->(b),
15+
16+
(e:User {age: 67, my_map: {key: 'value', key2: 3.14, key3: [1, 2, 3], key4: {a: 1, b: null}}}),
17+
(:User {age: 42, pets: ['cat', false, 'dog']}),
18+
(f:User&Person {name: 'Fawad', age: 78}),
19+
20+
(a)-[:LINK {weight: 4}]->(wizardMan),
21+
(e)-[:LINK]->(d),
22+
(e)-[:OTHER_LINK {weight: -2}]->(f);
23+
"""
24+
expected_node_dicts = [
25+
{"properties": {"name": "Alice", "age": 23, "__labels": ["User"]}},
26+
{"properties": {"name": "Bridget", "age": 34, "__labels": ["User", "person"]}},
27+
{"properties": {"name": "Charles: The wizard, man", "hello": True, "height": None, "__labels": ["User"]}},
28+
{"properties": {"__labels": ["User"]}},
29+
{
30+
"properties": {
31+
"age": 67,
32+
"my_map": {"key": "value", "key2": 3.14, "key3": [1, 2, 3], "key4": {"a": 1, "b": None}},
33+
"__labels": ["User"],
34+
}
35+
},
36+
{"properties": {"age": 42, "pets": ["cat", False, "dog"], "__labels": ["User"]}},
37+
{"properties": {"name": "Fawad", "age": 78, "__labels": ["Person", "User"]}},
38+
]
39+
40+
VG = from_gql_create(query)
41+
42+
assert len(VG.nodes) == len(expected_node_dicts)
43+
for i, exp_node in enumerate(expected_node_dicts):
44+
created_node = VG.nodes[i]
45+
46+
assert created_node.properties == exp_node["properties"]
47+
48+
expected_relationships_dicts = [
49+
{"source_idx": 0, "target_idx": 1, "properties": {"weight": 0.5, "__type": "LINK"}},
50+
{"source_idx": 0, "target_idx": 2, "properties": {"weight": 4, "__type": "LINK"}},
51+
{"source_idx": 4, "target_idx": 3, "properties": {"__type": "LINK"}},
52+
{"source_idx": 4, "target_idx": 6, "properties": {"weight": -2, "__type": "OTHER_LINK"}},
53+
]
54+
55+
assert len(VG.relationships) == len(expected_relationships_dicts)
56+
for i, exp_rel in enumerate(expected_relationships_dicts):
57+
created_rel = VG.relationships[i]
58+
assert created_rel.source == VG.nodes[exp_rel["source_idx"]].id
59+
assert created_rel.target == VG.nodes[exp_rel["target_idx"]].id
60+
assert created_rel.properties == exp_rel["properties"]
61+
62+
63+
def test_unbalanced_parentheses_snippet() -> None:
64+
query = "CREATE (a:User, (b:User })"
65+
with pytest.raises(ValueError, match=r"Unbalanced parentheses near: `.*\(b:User.*"):
66+
from_gql_create(query)
67+
68+
69+
def test_node_property_syntax_error_snippet1() -> None:
70+
query = "CREATE (a:User {x, y:4})"
71+
with pytest.raises(ValueError, match=r"Property syntax error near: `.*x, y.*"):
72+
from_gql_create(query)
73+
74+
75+
def test_node_property_syntax_error_snippet2() -> None:
76+
query = "CREATE (a:User {x:5,, y:4})"
77+
with pytest.raises(ValueError, match=r"Property syntax error near: `.*x:5,, y.*"):
78+
from_gql_create(query)
79+
80+
81+
def test_invalid_element_in_create_snippet() -> None:
82+
query = "CREATE [not_a_node]"
83+
with pytest.raises(ValueError, match=r"Invalid element in CREATE near: `\[not_a_node.*"):
84+
from_gql_create(query)
85+
86+
87+
def test_rel_property_syntax_error_snippet() -> None:
88+
query = "CREATE (a:User), (b:User), (a)-[:LINK {weight0.5}]->(b)"
89+
with pytest.raises(ValueError, match=r"Property syntax error near: `\), \(a\)-\[:LINK {weight0.5}\]->\(b`."):
90+
from_gql_create(query)
91+
92+
93+
def test_unknown_node_alias() -> None:
94+
query = "CREATE (a)-[:LINK {weight0.5}]->(b)"
95+
with pytest.raises(
96+
ValueError, match=r"Relationship references unknown node alias: 'a' near: `\(a\)-\[:LINK {weig`"
97+
):
98+
from_gql_create(query)

0 commit comments

Comments
 (0)