Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for minimum/maximum in JSON schemas #1019

Open
mmoskal opened this issue Sep 10, 2024 · 0 comments · May be fixed by #1023
Open

support for minimum/maximum in JSON schemas #1019

mmoskal opened this issue Sep 10, 2024 · 0 comments · May be fixed by #1023
Assignees

Comments

@mmoskal
Copy link
Contributor

mmoskal commented Sep 10, 2024

Currently we don't support the numerical bounds for JSON numbers. I wrote some code that compiles the bounds into regular expressions.

I don't have time right now to make it into full PR right now - maybe someone else wants to take a look.

There are two functions in the file, one for integer and one for real (float) ranges. It only supports inclusive ranges right now (not a problem for integers, but for floats would need some more work to support exclusive).

import re
import math


def mk_or(parts: list[str]) -> str:
    if len(parts) == 1:
        return parts[0]
    return "(" + "|".join(parts) + ")"


def num_digits(n: int) -> int:
    return len(str(n))


def rx_int_range(left: int, right: int) -> str:
    assert left <= right
    if left < 0:
        if right < 0:
            return "(-" + rx_int_range(-right, -left) + ")"
        else:
            return "(-" + rx_int_range(0, -left) + "|" + rx_int_range(0, right) + ")"
    else:
        if num_digits(left) == num_digits(right):
            l = str(left)
            r = str(right)
            if left == right:
                return f"({l})"

            lpref = l[:-1]
            lx = l[-1]
            rpref = r[:-1]
            rx = r[-1]

            # 1723-1728 => 172[3-8]
            if lpref == rpref:
                return f"({lpref}[{lx}-{rx}])"

            # general case 723-915 => 72[3-9]|(73-90)[0-9]|91[0-5]
            left_rec = int(lpref)
            right_rec = int(rpref)
            assert left_rec < right_rec
            parts = []

            # optimizations:
            # for 0, we have 720-915 => (72-90)[0-9]|91[0-5]
            if lx != "0":
                left_rec += 1
                parts.append(f"{lpref}[{lx}-9]")
            # for 9 we have 723-919 => 72[3-9]|(73-91)[0-9]
            if rx != "9":
                right_rec -= 1
                parts.append(f"{rpref}[0-{rx}]")

            # the middle can be empty 723-734 => 72[3-9]|73[0-4]
            if left_rec <= right_rec:
                inner = rx_int_range(left_rec, right_rec)
                parts.append(f"{inner}[0-9]")

            return mk_or(parts)
        else:
            break_point = 10 ** num_digits(left) - 1
            return mk_or(
                [rx_int_range(left, break_point), rx_int_range(break_point + 1, right)]
            )


def lexi_x_to_9(x: str) -> str:
    if x == "":
        return "[0-9]*"
    if len(x) == 1:
        return f"[{x}-9][0-9]*"
    x0 = int(x[0])
    parts = [x[0] + lexi_x_to_9(x[1:])]
    if x0 < 9:
        parts.append(f"[{x0 + 1}-9][0-9]*")
    return mk_or(parts)


def lexi_0_to_x(x: str) -> str:
    if x == "":
        return ""  # don't allow trailing zeros
    x0 = int(x[0])
    parts = [x[0] + lexi_0_to_x(x[1:])]
    if x0 > 0:
        parts.append(f"[0-{x0 - 1}][0-9]*")
    return mk_or(parts)


def lexi_range(ld: str, rd: str) -> str:
    assert len(ld) == len(rd)
    if ld == rd:
        return ld
    l0 = int(ld[0])
    r0 = int(rd[0])
    # common prefix: 137-144 => 1(37-44)
    if r0 == l0:
        return ld[0] + lexi_range(ld[1:], rd[1:])
    assert l0 < r0
    # 23470-82142 => 2(347-999)|[3-7][0-9]*|8(0000-2142)
    parts = [ld[0] + lexi_x_to_9(ld[1:].rstrip("0"))]
    # is the [3-7][0-9]* part empty?
    if l0 + 1 < r0:
        parts.append(f"[{l0 + 1}-{r0 - 1}][0-9]*")
    parts.append(rd[0] + lexi_0_to_x(rd[1:].rstrip("0")))
    return mk_or(parts)

def float_to_str(f: float) -> str:
    s = f"{f:f}"
    return s.rstrip("0").rstrip(".")

def rx_float_range(left: float, right: float) -> str:
    assert left <= right
    if left < 0:
        if right < 0:
            return "(-" + rx_float_range(-right, -left) + ")"
        else:
            return (
                "(-"
                + rx_float_range(0.0, -left)
                + "|"
                + rx_float_range(0.0, right)
                + ")"
            )
    else:
        l = float_to_str(left)
        r = float_to_str(right)
        if left == right:
            return f"({l})"

        if "e" in l or "e" in r:
            raise ValueError("Scientific notation not supported")
        if not math.isfinite(left) or not math.isfinite(right):
            raise ValueError("Infinite numbers not supported")

        left_rec = int(l.split(".")[0])
        right_rec = int(r.split(".")[0])

        ld = (l.split(".") + [""])[1]
        rd = (r.split(".") + [""])[1]

        # 17.123-17.1448 -> 17.((1230-1447)[0-9]*|1448)
        if left_rec == right_rec:
            while len(ld) < len(rd):
                ld += "0"
            while len(rd) < len(ld):
                rd += "0"
            suff = "\\." + lexi_range(ld, rd)
            if int(ld) == 0:
                return f"({left_rec}({suff})?)"
            else:
                return f"({left_rec}{suff})"

        parts = []

        # 7.321-22.123 -> 7.(321-999)|8-21(.[0-9]+)?|22.(000-123)
        if ld:
            parts.append(f"({left_rec}\\.{lexi_x_to_9(ld)})")
            left_rec += 1

        if right_rec - 1 >= left_rec:
            inner = rx_int_range(left_rec, right_rec - 1)
            parts.append(f"({inner}(\\.[0-9]+)?)")

        if rd:
            parts.append(f"({right_rec}(\\.{lexi_0_to_x(rd)})?)")
        else:
            parts.append(f"{right_rec}")

        return mk_or(parts)


def do_test_int_range(rx: str, left: int, right: int) -> None:
    print(rx)
    n = left - 1000
    while n < right + 1000:
        m = re.fullmatch(rx, str(n)) is not None
        f = left <= n <= right
        if m != f:
            print(f"Failed for {n} match={m} expected={f}")
            assert False
        n += 1


def test_int_range(left: int, right: int) -> None:
    print(f"Testing range {left}-{right}")
    rx = rx_int_range(left, right)
    do_test_int_range(rx, left, right)


def test_float_range(left: float, right: float) -> None:
    print(f"Testing range {left}-{right}")
    rx = rx_float_range(left, right)
    do_test_int_range(rx, math.ceil(left), math.floor(right))
    eps = 0.000001
    for x in [left, right, 0, int(left), int(right)]:
        for off in [0, -eps, eps, 1, -1]:
            n = x + off
            ns = float_to_str(n)
            m = re.fullmatch(rx, ns) is not None
            f = left <= n <= right
            if m != f:
                print(f"Failed float for {ns} match={m} expected={f}")
                assert False


def main():
    test_int_range(0, 9)
    test_int_range(1, 7)
    test_int_range(0, 99)
    test_int_range(13, 170)
    test_int_range(13, 17)
    test_int_range(13, 27)
    test_int_range(13, 57)
    test_int_range(72, 91)
    test_int_range(723, 915)
    test_int_range(23, 915)
    test_int_range(-1, 915)
    test_int_range(-9, 9)
    test_int_range(-3, 3)
    test_int_range(-3, 0)
    test_int_range(-72, 13)

    test_float_range(0, 10)
    test_float_range(-10, 0)

    test_float_range(0.5, 0.72)
    test_float_range(0.5, 1.72)
    test_float_range(0.5, 1.32)
    test_float_range(0.3245, 0.325)
    test_float_range(1, 2.34)
    test_float_range(1.33, 2)
    test_float_range(1, 10.34)
    test_float_range(1.33, 10)
    test_float_range(-1.33, 10)
    test_float_range(-17.23, -1.33)
    test_float_range(-1.23, -1.221)
    test_float_range(-10.2, 45293.9)


if __name__ == "__main__":
    main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
2 participants