Automa is a regex-to-Julia compiler.
By compiling regex to Julia code in the form of Expr
objects,
Automa provides facilities to create efficient and robust regex-based lexers, tokenizers and parsers using Julia's metaprogramming capabilities.
You can view Automa as a regex engine that can insert arbitrary Julia code into its input matching process, which will be executed when certain parts of the regex matches an input.
Automa is designed to generate very efficient code to scan large text data, often much faster than handcrafted code.
For more information read the documentation, or read the examples below and in the examples/
directory in this repository.
using Automa
generate_buffer_validator(:validate_alphanumeric, re"[a-zA-Z0-9]*") |> eval
for s in ["abc", "aU81m", "!,>"]
println("$s is alphanumeric? $(isnothing(validate_alphanumeric(s)))")
end
using Automa
tokens = [
:identifier => re"[A-Za-z_][0-9A-Za-z_!]*",
:lparens => re"\(",
:rparens => re"\)",
:comma => re",",
:quot => re"\"",
:space => re"[\t\f ]+",
];
@eval @enum Token errortoken $(first.(tokens)...)
make_tokenizer((errortoken,
[Token(i) => j for (i,j) in enumerate(last.(tokens))]
)) |> eval
collect(tokenize(Token, """(alpha, "beta15")"""))
using Automa
machine = let
name = onexit!(onenter!(re"[^\t\r\n]+", :mark), :name)
field = onexit!(onenter!(re"[^\t\r\n]+", :mark), :field)
nameline = name * rep('\t' * name)
record = onexit!(field * rep('\t' * field), :record)
compile(nameline * re"\r?\n" * record * rep(re"\r?\n" * record) * rep(re"\r?\n"))
end
actions = Dict(
:mark => :(pos = p),
:name => :(push!(headers, String(data[pos:p-1]))),
:field => quote
n_fields += 1
push!(fields, String(data[pos:p-1]))
end,
:record => quote
n_fields == length(headers) || error("Malformed TSV")
n_fields = 0
end
)
@eval function parse_tsv(data)
headers = String[]
fields = String[]
pos = n_fields = 0
$(generate_code(machine, actions))
(headers, reshape(fields, length(headers), :))
end
header, data = parse_tsv("a\tabc\n12\t13\r\nxyc\tz\n\n")