Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 53 additions & 11 deletions jsonquerylang/functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from functools import reduce, cmp_to_key
from math import prod
import re
import regex


def get_functions(compile, build_function):
Expand Down Expand Up @@ -282,14 +282,54 @@ def fn_not_in(path, not_in_values):
)

def fn_regex(path, expression, options=None):
regex = (
re.compile(expression, flags=_parse_regex_flags(options))
compiled_regex = (
regex.compile(expression, flags=_parse_regex_flags(options))
if options
else re.compile(expression)
else regex.compile(expression)
)
getter = compile(path)

return lambda value: regex.match(getter(value)) is not None
return lambda value: compiled_regex.match(getter(value)) is not None

def match_to_json(result):
value = result.group()
groups = [*result.groups()]
named_groups = result.groupdict()

if named_groups:
return {"value": value, "groups": groups, "namedGroups": named_groups}

if groups:
return {"value": value, "groups": groups}

return {"value": value}

def fn_match(path, expression, options=None):
compiled_regex = (
regex.compile(expression, flags=_parse_regex_flags(options))
if options
else regex.compile(expression)
)
getter = compile(path)

def search(value):
first_match = compiled_regex.search(getter(value))

return match_to_json(first_match) if first_match else None

return search

def fn_match_all(path, expression, options=None):
compiled_regex = (
regex.compile(expression, flags=_parse_regex_flags(options))
if options
else regex.compile(expression)
)
getter = compile(path)

return lambda value: [
match_to_json(item) for item in compiled_regex.finditer(getter(value))
]

def eq(a, b):
return a == b and type(a) == type(b)
Expand Down Expand Up @@ -360,6 +400,8 @@ def lt(a, b):
"in": fn_in,
"not in": fn_not_in,
"regex": fn_regex,
"match": fn_match,
"matchAll": fn_match_all,
"eq": fn_eq,
"gt": fn_gt,
"gte": fn_gte,
Expand All @@ -384,12 +426,12 @@ def _parse_regex_flags(flags):
return None

all_flags = {
"A": re.A,
"I": re.I,
"M": re.M,
"S": re.S,
"X": re.X,
"L": re.L,
"A": regex.A,
"I": regex.I,
"M": regex.M,
"S": regex.S,
"X": regex.X,
"L": regex.L,
}

first, *rest = flags.upper()
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Pygments==2.19.1
pyproject_hooks==1.2.0
pywin32-ctypes==0.2.3
readme_renderer==44.0
regex==2025.11.3
requests==2.32.3
requests-toolbelt==1.0.0
rfc3986==2.0.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@
"Operating System :: OS Independent",
],
include_package_data=True,
install_requires=[],
install_requires=["regex"],
)
149 changes: 149 additions & 0 deletions tests/test-suite/compile.test.json
Original file line number Diff line number Diff line change
Expand Up @@ -2011,6 +2011,155 @@
"input": null,
"query": ["regex", "Joe", "^[A-z]+$"],
"output": true
},
{
"input": null,
"query": ["regex", "2025", "^[A-z]+$"],
"output": false
}
]
},
{
"category": "match",
"description": "should extract a regular expression match from a string",
"tests": [
{
"input": null,
"query": ["match", "Hello World!", "[A-z]+"],
"output": { "value": "Hello" }
},
{
"input": null,
"query": ["match", "2025-11-05", "[A-z]+"],
"output": null
},
{
"input": null,
"query": ["match", "Hello World!", "([A-Z])([a-z]+)"],
"output": {
"value": "Hello",
"groups": ["H", "ello"]
}
}
]
},
{
"category": "match",
"description": "should extract a regular expression match with groups from a string",
"tests": [
{
"input": null,
"query": [
"match",
"I'm on holiday from 2025-07-18 till 2025-08-01",
"(?<year>\\d{4})-(?<month>\\d{2})-(?<date>\\d{2})"
],
"output": {
"value": "2025-07-18",
"groups": ["2025", "07", "18"],
"namedGroups": { "year": "2025", "month": "07", "date": "18" }
}
},
{
"input": null,
"query": ["match", "Hello World!", "(?<year>\\d{4})-(?<month>\\d{2})-(?<date>\\d{2})"],
"output": null
}
]
},
{
"category": "match",
"description": "should extract a regular expression match with flags from a string",
"tests": [
{
"input": null,
"query": ["match", "Hello World!", "world", ""],
"output": null
},
{
"input": null,
"query": ["match", "Hello World!", "world!", "i"],
"output": { "value": "World!" }
},
{
"input": null,
"query": ["match", "Hello World!", "(?<group1>world)!", "i"],
"output": {
"value": "World!",
"groups": ["World"],
"namedGroups": { "group1": "World" }
}
}
]
},
{
"category": "matchAll",
"description": "should extract all regular expression matches from a string",
"tests": [
{
"input": null,
"query": ["matchAll", "Hello World!", "[A-z]+"],
"output": [{ "value": "Hello" }, { "value": "World" }]
},
{
"input": null,
"query": ["matchAll", "2025-05-11", "[A-z]+"],
"output": []
},
{
"input": null,
"query": ["matchAll", "Hello World!", "([A-Z])([a-z]+)"],
"output": [
{ "value": "Hello", "groups": ["H", "ello"] },
{ "value": "World", "groups": ["W", "orld"] }
]
}
]
},
{
"category": "matchAll",
"description": "should extract all regular expression matches with groups from a string",
"tests": [
{
"input": null,
"query": [
"matchAll",
"I'm on holiday from 2025-07-18 till 2025-08-01",
"(?<year>\\d{4})-(?<month>\\d{2})-(?<date>\\d{2})"
],
"output": [
{
"value": "2025-07-18",
"groups": ["2025", "07", "18"],
"namedGroups": { "year": "2025", "month": "07", "date": "18" }
},
{
"value": "2025-08-01",
"groups": ["2025", "08", "01"],
"namedGroups": { "year": "2025", "month": "08", "date": "01" }
}
]
},
{
"input": null,
"query": ["matchAll", "Hello World!", "(?<year>\\d{4})-(?<month>\\d{2})-(?<date>\\d{2})"],
"output": []
}
]
},
{
"category": "matchAll",
"description": "should extract all regular expression matches with a flag from a string",
"tests": [
{
"input": null,
"query": ["matchAll", "Hello World!", "\\b[a-z]+\\b", ""],
"output": []
},
{
"input": null,
"query": ["matchAll", "Hello World!", "\\b[a-z]+\\b", "i"],
"output": [{ "value": "Hello" }, { "value": "World" }]
}
]
},
Expand Down