Skip to content

Commit 9779062

Browse files
committed
Add from_gql_create constructor
1 parent c9ce0b5 commit 9779062

File tree

3 files changed

+355
-0
lines changed

3 files changed

+355
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Import from the Neo4j Graph Data Science Library
2+
------------------------------------------------
3+
4+
.. automodule:: neo4j_viz.gql_create
5+
:members:
Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
import re
2+
import uuid
3+
from typing import Any, Optional
4+
5+
from neo4j_viz import Node, Relationship, VisualizationGraph
6+
7+
8+
def _parse_value(value_str: str) -> Any:
9+
value_str = value_str.strip()
10+
if not value_str:
11+
return None
12+
13+
# Parse object
14+
if value_str.startswith("{") and value_str.endswith("}"):
15+
inner = value_str[1:-1].strip()
16+
result = {}
17+
depth = 0
18+
in_string = None
19+
start_idx = 0
20+
for i, ch in enumerate(inner):
21+
if in_string is None:
22+
if ch in ["'", '"']:
23+
in_string = ch
24+
elif ch in ["{", "["]:
25+
depth += 1
26+
elif ch in ["}", "]"]:
27+
depth -= 1
28+
elif ch == "," and depth == 0:
29+
segment = inner[start_idx:i].strip()
30+
if ":" not in segment:
31+
return None
32+
k, v = segment.split(":", 1)
33+
k = k.strip().strip("'\"")
34+
result[k] = _parse_value(v)
35+
start_idx = i + 1
36+
else:
37+
if ch == in_string:
38+
in_string = None
39+
if inner[start_idx:]:
40+
segment = inner[start_idx:].strip()
41+
if ":" not in segment:
42+
return None
43+
k, v = segment.split(":", 1)
44+
k = k.strip().strip("'\"")
45+
result[k] = _parse_value(v)
46+
return result
47+
48+
# Parse list
49+
if value_str.startswith("[") and value_str.endswith("]"):
50+
inner = value_str[1:-1].strip()
51+
items = []
52+
depth = 0
53+
in_string = None
54+
start_idx = 0
55+
for i, ch in enumerate(inner):
56+
if in_string is None:
57+
if ch in ["'", '"']:
58+
in_string = ch
59+
elif ch in ["{", "["]:
60+
depth += 1
61+
elif ch in ["}", "]"]:
62+
depth -= 1
63+
elif ch == "," and depth == 0:
64+
items.append(_parse_value(inner[start_idx:i]))
65+
start_idx = i + 1
66+
else:
67+
if ch == in_string:
68+
in_string = None
69+
if inner[start_idx:]:
70+
items.append(_parse_value(inner[start_idx:]))
71+
return items
72+
73+
# Parse boolean, float, int, or string
74+
if re.match(r"^-?\d+$", value_str):
75+
return int(value_str)
76+
if re.match(r"^-?\d+\.\d+$", value_str):
77+
return float(value_str)
78+
if value_str.lower() == "true":
79+
return True
80+
if value_str.lower() == "false":
81+
return False
82+
if value_str.lower() == "null":
83+
return None
84+
return value_str.strip("'\"")
85+
86+
87+
def _get_snippet(q: str, idx: int, context: int = 15) -> str:
88+
start = max(0, idx - context)
89+
end = min(len(q), idx + context)
90+
return q[start:end].replace("\n", " ")
91+
92+
93+
def from_gql_create(query: str) -> VisualizationGraph:
94+
"""
95+
Parse a GQL CREATE query and return a VisualizationGraph object representing the graph it creates.
96+
97+
Please note that this function is not a full GQL parser, it only handles CREATE queries that do not contain
98+
other clauses like MATCH, WHERE, RETURN, etc, or any Cypher function calls.
99+
It also does not handle all possible GQL syntax, but it should work for most common cases.
100+
101+
Parameters
102+
----------
103+
query : str
104+
The GQL CREATE query to parse
105+
"""
106+
107+
query = query.strip()
108+
# Case-insensitive check that 'CREATE' is the first non-whitespace token
109+
if not re.match(r"(?i)^create\b", query):
110+
raise ValueError("Query must begin with 'CREATE' (case insensitive).")
111+
112+
def parse_prop_str(prop_str: str, prop_start: int, props: dict[str, Any]) -> None:
113+
depth = 0
114+
in_string = None
115+
start_idx = 0
116+
for i, ch in enumerate(prop_str):
117+
if in_string is None:
118+
if ch in ["'", '"']:
119+
in_string = ch
120+
elif ch in ["{", "["]:
121+
depth += 1
122+
elif ch in ["}", "]"]:
123+
depth -= 1
124+
elif ch == "," and depth == 0:
125+
pair = prop_str[start_idx:i].strip()
126+
if ":" not in pair:
127+
snippet = _get_snippet(query, prop_start + start_idx)
128+
raise ValueError(f"Property syntax error near: `{snippet}`.")
129+
k, v = pair.split(":", 1)
130+
k = k.strip().strip("'\"")
131+
props[k] = _parse_value(v)
132+
start_idx = i + 1
133+
else:
134+
if ch == in_string:
135+
in_string = None
136+
if prop_str[start_idx:]:
137+
pair = prop_str[start_idx:].strip()
138+
if ":" not in pair:
139+
snippet = _get_snippet(query, prop_start + start_idx)
140+
raise ValueError(f"Property syntax error near: `{snippet}`.")
141+
k, v = pair.split(":", 1)
142+
k = k.strip().strip("'\"")
143+
props[k] = _parse_value(v)
144+
145+
def parse_labels_and_props(s: str) -> tuple[Optional[str], dict[str, Any]]:
146+
props = {}
147+
prop_match = re.search(r"\{(.*)\}", s)
148+
prop_str = ""
149+
if prop_match:
150+
prop_str = prop_match.group(1)
151+
prop_start = query.index(prop_str, query.index(s))
152+
s = s[: prop_match.start()].strip()
153+
alias_labels = re.split(r"[:&]", s)
154+
raw_alias = alias_labels[0].strip()
155+
final_alias = raw_alias if raw_alias else None
156+
157+
label_list = [lbl.strip() for lbl in alias_labels[1:]]
158+
props["__labels"] = sorted(label_list)
159+
160+
if prop_str:
161+
parse_prop_str(prop_str, prop_start, props)
162+
return final_alias, props
163+
164+
nodes = []
165+
relationships = []
166+
alias_to_id = {}
167+
anonymous_count = 0
168+
169+
query = re.sub(r"(?i)^create\s*", "", query, count=1).rstrip(";").strip()
170+
parts = []
171+
bracket_level = 0
172+
current: list[str] = []
173+
for i, char in enumerate(query):
174+
if char == "(":
175+
bracket_level += 1
176+
elif char == ")":
177+
bracket_level -= 1
178+
if bracket_level < 0:
179+
snippet = _get_snippet(query, i)
180+
raise ValueError(f"Unbalanced parentheses near: `{snippet}`.")
181+
if char == "," and bracket_level == 0:
182+
parts.append("".join(current).strip())
183+
current = []
184+
else:
185+
current.append(char)
186+
parts.append("".join(current).strip())
187+
if bracket_level != 0:
188+
snippet = _get_snippet(query, len(query) - 1)
189+
raise ValueError(f"Unbalanced parentheses near: `{snippet}`.")
190+
191+
node_pattern = re.compile(r"^\(([^)]+)\)$")
192+
rel_pattern = re.compile(r"^\(([^)]+)\)-\s*\[\s*:(\w+)\s*(\{[^}]*\})?\s*\]->\(([^)]+)\)$")
193+
194+
for part in parts:
195+
node_m = node_pattern.match(part)
196+
if node_m:
197+
alias_labels_props = node_m.group(1).strip()
198+
alias, props = parse_labels_and_props(alias_labels_props)
199+
if not alias:
200+
alias = f"_anon_{anonymous_count}"
201+
anonymous_count += 1
202+
if alias not in alias_to_id:
203+
alias_to_id[alias] = str(uuid.uuid4())
204+
nodes.append(Node(id=alias_to_id[alias], properties=props))
205+
else:
206+
rel_m = rel_pattern.match(part)
207+
if rel_m:
208+
left_node = rel_m.group(1).strip()
209+
rel_type = rel_m.group(2).replace(":", "").strip()
210+
right_node = rel_m.group(4).strip()
211+
212+
left_alias, left_props = parse_labels_and_props(left_node)
213+
if not left_alias or left_alias not in alias_to_id:
214+
snippet = _get_snippet(query, query.index(left_node))
215+
raise ValueError(f"Relationship references unknown node alias: '{left_alias}' near: `{snippet}`.")
216+
217+
right_alias, right_props = parse_labels_and_props(right_node)
218+
if not right_alias or right_alias not in alias_to_id:
219+
snippet = _get_snippet(query, query.index(right_node))
220+
raise ValueError(f"Relationship references unknown node alias: '{right_alias}' near: `{snippet}`.")
221+
222+
rel_id = str(uuid.uuid4())
223+
rel_props = {"__type": rel_type}
224+
rel_props_str = rel_m.group(3) or ""
225+
if rel_props_str:
226+
inner_str = rel_props_str.strip("{}").strip()
227+
prop_start = query.index(inner_str, query.index(inner_str))
228+
parse_prop_str(inner_str, prop_start, rel_props)
229+
230+
relationships.append(
231+
Relationship(
232+
id=rel_id,
233+
source=alias_to_id[left_alias],
234+
target=alias_to_id[right_alias],
235+
properties=rel_props,
236+
)
237+
)
238+
else:
239+
snippet = part[:30]
240+
raise ValueError(f"Invalid element in CREATE near: `{snippet}`.")
241+
242+
return VisualizationGraph(nodes=nodes, relationships=relationships)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import pytest
2+
3+
from neo4j_viz.gql_create import from_gql_create
4+
5+
6+
def test_from_gql_create() -> None:
7+
query = """
8+
CREATE
9+
(a:User {name: 'Alice', age: 23}),
10+
(b:User:person {name: "Bridget", age: 34}),
11+
(wizardMan:User {name: 'Charles: The wizard, man', hello: true, height: NULL}),
12+
(d:User),
13+
14+
(a)-[:LINK {weight: 0.5}]->(b),
15+
16+
(e:User {age: 67, my_map: {key: 'value', key2: 3.14, key3: [1, 2, 3], key4: {a: 1, b: null}}}),
17+
(:User {age: 42, pets: ['cat', false, 'dog']}),
18+
19+
(f:User&Person
20+
21+
22+
{name: 'Fawad', age: 78}),
23+
24+
(a)-[:LINK {weight: 4}]->(wizardMan),
25+
(e)-[:LINK]->(d),
26+
(e)-[:OTHER_LINK {weight: -2}]->(f);
27+
"""
28+
expected_node_dicts = [
29+
{"properties": {"name": "Alice", "age": 23, "__labels": ["User"]}},
30+
{"properties": {"name": "Bridget", "age": 34, "__labels": ["User", "person"]}},
31+
{"properties": {"name": "Charles: The wizard, man", "hello": True, "height": None, "__labels": ["User"]}},
32+
{"properties": {"__labels": ["User"]}},
33+
{
34+
"properties": {
35+
"age": 67,
36+
"my_map": {"key": "value", "key2": 3.14, "key3": [1, 2, 3], "key4": {"a": 1, "b": None}},
37+
"__labels": ["User"],
38+
}
39+
},
40+
{"properties": {"age": 42, "pets": ["cat", False, "dog"], "__labels": ["User"]}},
41+
{"properties": {"name": "Fawad", "age": 78, "__labels": ["Person", "User"]}},
42+
]
43+
44+
VG = from_gql_create(query)
45+
46+
assert len(VG.nodes) == len(expected_node_dicts)
47+
for i, exp_node in enumerate(expected_node_dicts):
48+
created_node = VG.nodes[i]
49+
50+
assert created_node.properties == exp_node["properties"]
51+
52+
expected_relationships_dicts = [
53+
{"source_idx": 0, "target_idx": 1, "properties": {"weight": 0.5, "__type": "LINK"}},
54+
{"source_idx": 0, "target_idx": 2, "properties": {"weight": 4, "__type": "LINK"}},
55+
{"source_idx": 4, "target_idx": 3, "properties": {"__type": "LINK"}},
56+
{"source_idx": 4, "target_idx": 6, "properties": {"weight": -2, "__type": "OTHER_LINK"}},
57+
]
58+
59+
assert len(VG.relationships) == len(expected_relationships_dicts)
60+
for i, exp_rel in enumerate(expected_relationships_dicts):
61+
created_rel = VG.relationships[i]
62+
assert created_rel.source == VG.nodes[exp_rel["source_idx"]].id
63+
assert created_rel.target == VG.nodes[exp_rel["target_idx"]].id
64+
assert created_rel.properties == exp_rel["properties"]
65+
66+
67+
def test_unbalanced_parentheses_snippet() -> None:
68+
query = "CREATE (a:User, (b:User })"
69+
with pytest.raises(ValueError, match=r"Unbalanced parentheses near: `.*\(b:User.*"):
70+
from_gql_create(query)
71+
72+
73+
def test_node_property_syntax_error_snippet1() -> None:
74+
query = "CREATE (a:User {x, y:4})"
75+
with pytest.raises(ValueError, match=r"Property syntax error near: `.*x, y.*"):
76+
from_gql_create(query)
77+
78+
79+
def test_node_property_syntax_error_snippet2() -> None:
80+
query = "CREATE (a:User {x:5,, y:4})"
81+
with pytest.raises(ValueError, match=r"Property syntax error near: `.*x:5,, y.*"):
82+
from_gql_create(query)
83+
84+
85+
def test_invalid_element_in_create_snippet() -> None:
86+
query = "CREATE [not_a_node]"
87+
with pytest.raises(ValueError, match=r"Invalid element in CREATE near: `\[not_a_node.*"):
88+
from_gql_create(query)
89+
90+
91+
def test_rel_property_syntax_error_snippet() -> None:
92+
query = "CREATE (a:User), (b:User), (a)-[:LINK {weight0.5}]->(b)"
93+
with pytest.raises(ValueError, match=r"Property syntax error near: `\), \(a\)-\[:LINK {weight0.5}\]->\(b`."):
94+
from_gql_create(query)
95+
96+
97+
def test_unknown_node_alias() -> None:
98+
query = "CREATE (a)-[:LINK {weight0.5}]->(b)"
99+
with pytest.raises(
100+
ValueError, match=r"Relationship references unknown node alias: 'a' near: `\(a\)-\[:LINK {weig`"
101+
):
102+
from_gql_create(query)
103+
104+
105+
def test_no_create_keyword() -> None:
106+
query = "(a:User {y:4})"
107+
with pytest.raises(ValueError, match=r"Query must begin with 'CREATE' \(case insensitive\)."):
108+
from_gql_create(query)

0 commit comments

Comments
 (0)