Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A different kind of json #198

Merged
merged 3 commits into from
Jan 19, 2020
Merged

A different kind of json #198

merged 3 commits into from
Jan 19, 2020

Conversation

katef
Copy link
Owner

@katef katef commented Jan 17, 2020

Replacement of the libfsm json output with json that I hope is a little more convenient.

Typically it looks like this, with output for human-readable labels:

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -pl json '[abc][^x]'
{
  "statecount": 3,
  "start": 0,
  "end": [ 2 ],
  "edges": [ 
    { "src": 0, "dst": 0, "label": "[^abc]" },
    { "src": 0, "dst": 1, "label": "[abc]" },
    { "src": 1, "dst": 2, "label": "[^x]" },
    { "src": 1, "dst": 0, "label": "x" },
    { "src": 2, "dst": 2, "label": "/./" }
  ]
}

and similarly as an NFA:

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -npl json '[abc][^x]'
{
  "statecount": 9,
  "start": 0,
  "end": [ 1 ],
  "edges": [ 
    { "src": 0, "dst": 2, "label": "\u03B5" },
    { "src": 2, "dst": 2, "label": "/./" },
    { "src": 2, "dst": 4, "label": "[abc]" },
    { "src": 3, "dst": 3, "label": "/./" }, 
    { "src": 3, "dst": 1, "label": "\u03B5" },
    { "src": 4, "dst": 5, "label": "\u03B5" },
    { "src": 5, "dst": 6, "label": "\u03B5" },
    { "src": 6, "dst": 7, "label": "[^x]" },
    { "src": 6, "dst": 8, "label": "x" },
    { "src": 7, "dst": 3, "label": "\u03B5" }
  ]
}

With the .consolidate_edges option disabled, edges are given independently and this is visible with a symbol attribute (as opposed to label, which is intended for human consumption):

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -cbpl json 'a|b*c?e|d+'
{
  "statecount": 5,
  "start": 0, 
  "end": [ 1, 4 ],
  "edges": [ 
    { "src": 0, "dst": 1, "symbol": "a" },
    { "src": 0, "dst": 2, "symbol": "b" },
    { "src": 0, "dst": 3, "symbol": "c" },
    { "src": 0, "dst": 4, "symbol": "d" },
    { "src": 0, "dst": 1, "symbol": "e" },
    { "src": 2, "dst": 2, "symbol": "b" },
    { "src": 2, "dst": 3, "symbol": "c" },
    { "src": 2, "dst": 1, "symbol": "e" },
    { "src": 3, "dst": 1, "symbol": "e" },
    { "src": 4, "dst": 4, "symbol": "d" }
  ]
}

And with numeric output for byte values (intended for machine consumption):

; bmake -r CC=clang DEBUG=1 && ./build/bin/re -Xcbpl json 'abc'
{
  "statecount": 4,
  "start": 0,
  "end": [ 3 ],
  "edges": [
    { "src": 0, "dst": 1, "symbol": 97 },
    { "src": 1, "dst": 2, "symbol": 98 },
    { "src": 2, "dst": 3, "symbol": 99 }
  ]
}

I didn't write a json schema for this, sorry. I should provide one, I know.

I did experiment with using dagre for node placement.
Here's Graphviz's rendering for the above anchored /a|b*c?e|d+/ regexp:

image

And the same FSM with a quick mock-up page rendering that using dagre-D3:

image

I had wanted to use this json to model using dagre's algorithm for node placement,
in combination with dot(1)'s algorithm for edges. Unfortunately Graphviz will only
heed pre-defined coordinates when rendering using neato, which produces straight
lines for edges, which is of course not what I had hoped for:

image

Regardless, I wrote a nodejs script to read in this new json, and to format it out
to .dot format with the coordinates populated:

#!/usr/bin/env nodejs

var dagre = require("dagre");

const fs = require("fs");
const data = JSON.parse(fs.readFileSync(0, "utf-8"));

function json_read(graphlib, json) {
    var g;

    g = new graphlib.Graph({
            directed: true,
            multigraph: true,
            compound: false,
        });

    g.setGraph({ rankdir: "lr", nodesep: 30, edgesep: 30 });

    g.setNode("start", { label: "" });

    for (var i = 0; i < json.statecount; i++) {
        g.setNode(i, { label: "" });
    }

    var i = 0;
    json.edges.forEach(function (entry) {
        g.setEdge({ v: entry.src, w: entry.dst, name: i },
            { label: entry.label });
        i++;
    });

    g.setEdge({ v: "start", w: data.start }, { label: "" });

    return g;
}

var g = json_read(dagre.graphlib, data);

dagre.layout(g);

console.log("digraph G {");

console.log("\trankdir = LR;");
console.log("\tnode [ shape = circle ];");
console.log("\tedge [ weight = 2 ];");
console.log("\tnode [ label = \"\", width = 0.3 ];");
console.log("\tlayout = neato;"); // needed for pos="" XXX: but i want to use dot's edges!
console.log("\troot = start;");
console.log("");

console.log("\tSstart [ shape = none, label = \"\" ];");
console.log("");

// mapping from dagre's coordinates to graphviz's
var xscale = 0.03;
var yscale = 0.02;

g.nodes().forEach(function(v) {
    var n = g.node(v);
    if (data.end.indexOf(parseInt(v)) > -1) {
        console.log(`\tS${v} [ shape = \"doublecircle\" ];`);
    }
    console.log(`\tS${v} [ pos = "${n.x * xscale},${n.y * yscale}!" ];`);
});
console.log("");

g.edges().forEach(function(e) {
    var q = g.edge(e);
    console.log(`\tS${e.v} -> S${e.w} [ label = <${q.label}> ];`);
});

console.log("}");
console.log("");

which produces output like:

; ~/gh/libfsm-pristine/build/bin/re -pl json 'ab?c|de+x?' | ~/gh/libfsm-pristine/dagre/w2.js                    digraph G {
        rankdir = LR;
        node [ shape = circle ];
        edge [ weight = 2 ];
        node [ label = "", width = 0.3 ];
        layout = neato;
        root = start;

        Sstart [ shape = none, label = "" ];

        S0 [ pos = "1.5,3!" ];
        S1 [ pos = "3,0!" ];
        S2 [ pos = "6,1.85!" ];
        S3 [ shape = "doublecircle" ];
        S3 [ pos = "7.5,0.35000000000000003!" ];
        S4 [ pos = "4.5,0.9!" ];
        Sstart [ pos = "0,3!" ];

        S0 -> S0 [ label = <[^ad]> ];
        S0 -> S1 [ label = <a> ];
        S0 -> S2 [ label = <d> ];
        S1 -> S0 [ label = <[^a-d]> ];
        S1 -> S1 [ label = <a> ];
        S1 -> S4 [ label = <b> ];
        S1 -> S3 [ label = <c> ];
        S1 -> S2 [ label = <d> ];
        S2 -> S0 [ label = <[^ade]> ];
        S2 -> S1 [ label = <a> ];
        S2 -> S2 [ label = <d> ];
        S2 -> S3 [ label = <e> ];
        S3 -> S3 [ label = </./> ];
        S4 -> S0 [ label = <[^acd]> ];
        S4 -> S1 [ label = <a> ];
        S4 -> S3 [ label = <c> ];
        S4 -> S2 [ label = <d> ];
        Sstart -> S0 [ label = <> ];
}

katef added 3 commits January 16, 2020 16:53
This aims to please both human and machine consumption; when `.consolidate_edges` is set, human-readable labels are produced. Otherwise, edges have verbatim symbool attributes.
@katef katef merged commit e6bef24 into master Jan 19, 2020
@katef katef deleted the kate/different-json branch January 19, 2020 00:56
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant