Skip to content

Commit

Permalink
saw-lexer: Add a simple pygments lexer for SAWScript.
Browse files Browse the repository at this point in the history
This is (mostly) based on the SAWScript Alex lexer; because pygments
is quite a bit more simplistic, we have to be more careful when
defining its states.

I did not test this beyond the context of inline SAWScript in the
documentation, and don't really plan on testing further at this time.
As long as we have a passably-functional way to lex SAWScript for
documentation, we should be fine.

Ideally, this code wouldn't live here - it would be added to pygments.
That said, the accompanying pyproject.toml means that, when installed,
saw-lexer acts as a seamless extension of pygments; we won't have to
do anything special when running Sphinx to tell it about this lexer.
Additionally, if/when the lexer is added to the official pygments
release, the changes required to the documentation build will be
extremely small.

Note in particular that the matching for builtins is lackluster, in
that it is both liberal (based on basic prefix-matching) and
incomplete (I am certain I missed things).
I don't think it is anywhere close to our top priority, so for now I'm
happy with what I saw in generated (HTML/PDF) documentation.
  • Loading branch information
ChrisEPhifer committed Jan 28, 2025
1 parent a235e05 commit f808628
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 0 deletions.
13 changes: 13 additions & 0 deletions doc/saw-lexer/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[build-system]
requires = [
"setuptools",
]
build-backend = "setuptools.build_meta"

[project]
name = "saw-lexer"
version = "0.0.1"
dependencies = ["pygments"]

[project.entry-points."pygments.lexers"]
saw_lexer = "saw_lexer:SAWScriptLexer"
91 changes: 91 additions & 0 deletions doc/saw-lexer/src/saw_lexer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import sys
import re


import pygments
from pygments.lexer import RegexLexer, bygroups, using
from pygments.lexers.haskell import CryptolLexer
from pygments.token import (
Punctuation,
Literal,
Comment,
Operator,
Keyword,
Name,
String,
Number,
Whitespace,
)


class SAWScriptLexer(RegexLexer):
name = "SAWScript"
aliases = ["sawscript", "saw-script"]
filenames = ["*.saw"]
tokens = {
"root": [
# Whitespace-like tokens
(r"\s+", Whitespace),
(r"//.*?$", Comment.Singleline),
(r"/\*", Comment.Multiline, "comment"),
# String literals
(r"\"", String.Double, "string"),
# Embedded Cryptol
(r"\{\{|\{\|", Literal, "cryptol"),
# Symbols
(r"<-|->|==>", Operator.Word),
(r"~|-|\*|\+|/|%|\^|#|>>|>=|>|<<|<=|<|==|!=|&&|&|\|\||\||@", Operator),
(r"=", Operator.Word),
(r",|;|\(|\)|::|:|\[|\]|\{|\}|\.|\\", Punctuation),
# Reserved words (Keywords, types, constants, builtins)
# These require at least a space after (so we don't eat part of some
# other identifier)
(
r"(import|and|let|rec|in|do|if|then|else|as|hiding|typedef)(\s+)",
bygroups(Keyword.Reserved, Whitespace),
),
(
(
r"(CryptolSetup|JavaSetup|LLVMSetup|MIRSetup|ProofScript|"
r"TopLevel|CrucibleSetup|Int|String|Term|Type|Bool|AIG|"
r"CFG|CrucibleMethodSpec|LLVMSpec|JVMMethodSpec|JVMSpec|"
r"MIRSpec)(\s+)"
),
bygroups(Keyword.Type, Whitespace),
),
(r"(true|false|abc|z3)(\s+)", bygroups(Keyword.Constant, Whitespace)),
# N.b. The following is very liberal, but also missing many things.
# There is no centralized list of all builtins/primitives/initial
# basis elements...
(
(
r"((?:assume|external|goal|offline|load|print|prove|read|sat|save|write|llvm|jvm|mir|crucible|w4|sbv|unint)_?\w*|"
r"admit|beta_reduce_goal|enable_experimental|java_load_class|quickcheck|return|simplify|split_goal|trivial|unfolding)(\s+)"
),
bygroups(Name.Builtin, Whitespace),
),
# All other identifiers
(r"[a-zA-Z_][\w']*", Name),
# Number literals
(r"0[bB][0-1]+", Number.Bin),
(r"0[oO][0-7]+", Number.Oct),
(r"0[xX][0-9a-fA-F]", Number.Hex),
(r"\d+", Number.Integer),
],
"comment": [
(r"[^*/]", Comment.Multiline),
# Allows for arbitrary nesting
(r"/\*", Comment.Multiline, "#push"),
(r"\*/", Comment.Multiline, "#pop"),
(r"[*/]", Comment.Multiline),
],
"string": [
('[^"]+', String.Double),
('"', String.Double, "#pop"),
],
"cryptol": [
(r"[^|}]", using(CryptolLexer)),
(r"\|\}|\}\}", Literal, "#pop"),
(r"[|}]", using(CryptolLexer)),
],
}

0 comments on commit f808628

Please sign in to comment.