-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
saw-lexer: Add a simple pygments lexer for SAWScript.
This is (mostly) based on the SAWScript Alex lexer; because pygments is quite a bit more simplistic, we have to be more careful when defining its states. I did not test this beyond the context of inline SAWScript in the documentation, and don't really plan on testing further at this time. As long as we have a passably-functional way to lex SAWScript for documentation, we should be fine. Ideally, this code wouldn't live here - it would be added to pygments. That said, the accompanying pyproject.toml means that, when installed, saw-lexer acts as a seamless extension of pygments; we won't have to do anything special when running Sphinx to tell it about this lexer. Additionally, if/when the lexer is added to the official pygments release, the changes required to the documentation build will be extremely small. Note in particular that the matching for builtins is lackluster, in that it is both liberal (based on basic prefix-matching) and incomplete (I am certain I missed things). I don't think it is anywhere close to our top priority, so for now I'm happy with what I saw in generated (HTML/PDF) documentation.
- Loading branch information
1 parent
a235e05
commit f808628
Showing
2 changed files
with
104 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[build-system] | ||
requires = [ | ||
"setuptools", | ||
] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project] | ||
name = "saw-lexer" | ||
version = "0.0.1" | ||
dependencies = ["pygments"] | ||
|
||
[project.entry-points."pygments.lexers"] | ||
saw_lexer = "saw_lexer:SAWScriptLexer" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import sys | ||
import re | ||
|
||
|
||
import pygments | ||
from pygments.lexer import RegexLexer, bygroups, using | ||
from pygments.lexers.haskell import CryptolLexer | ||
from pygments.token import ( | ||
Punctuation, | ||
Literal, | ||
Comment, | ||
Operator, | ||
Keyword, | ||
Name, | ||
String, | ||
Number, | ||
Whitespace, | ||
) | ||
|
||
|
||
class SAWScriptLexer(RegexLexer): | ||
name = "SAWScript" | ||
aliases = ["sawscript", "saw-script"] | ||
filenames = ["*.saw"] | ||
tokens = { | ||
"root": [ | ||
# Whitespace-like tokens | ||
(r"\s+", Whitespace), | ||
(r"//.*?$", Comment.Singleline), | ||
(r"/\*", Comment.Multiline, "comment"), | ||
# String literals | ||
(r"\"", String.Double, "string"), | ||
# Embedded Cryptol | ||
(r"\{\{|\{\|", Literal, "cryptol"), | ||
# Symbols | ||
(r"<-|->|==>", Operator.Word), | ||
(r"~|-|\*|\+|/|%|\^|#|>>|>=|>|<<|<=|<|==|!=|&&|&|\|\||\||@", Operator), | ||
(r"=", Operator.Word), | ||
(r",|;|\(|\)|::|:|\[|\]|\{|\}|\.|\\", Punctuation), | ||
# Reserved words (Keywords, types, constants, builtins) | ||
# These require at least a space after (so we don't eat part of some | ||
# other identifier) | ||
( | ||
r"(import|and|let|rec|in|do|if|then|else|as|hiding|typedef)(\s+)", | ||
bygroups(Keyword.Reserved, Whitespace), | ||
), | ||
( | ||
( | ||
r"(CryptolSetup|JavaSetup|LLVMSetup|MIRSetup|ProofScript|" | ||
r"TopLevel|CrucibleSetup|Int|String|Term|Type|Bool|AIG|" | ||
r"CFG|CrucibleMethodSpec|LLVMSpec|JVMMethodSpec|JVMSpec|" | ||
r"MIRSpec)(\s+)" | ||
), | ||
bygroups(Keyword.Type, Whitespace), | ||
), | ||
(r"(true|false|abc|z3)(\s+)", bygroups(Keyword.Constant, Whitespace)), | ||
# N.b. The following is very liberal, but also missing many things. | ||
# There is no centralized list of all builtins/primitives/initial | ||
# basis elements... | ||
( | ||
( | ||
r"((?:assume|external|goal|offline|load|print|prove|read|sat|save|write|llvm|jvm|mir|crucible|w4|sbv|unint)_?\w*|" | ||
r"admit|beta_reduce_goal|enable_experimental|java_load_class|quickcheck|return|simplify|split_goal|trivial|unfolding)(\s+)" | ||
), | ||
bygroups(Name.Builtin, Whitespace), | ||
), | ||
# All other identifiers | ||
(r"[a-zA-Z_][\w']*", Name), | ||
# Number literals | ||
(r"0[bB][0-1]+", Number.Bin), | ||
(r"0[oO][0-7]+", Number.Oct), | ||
(r"0[xX][0-9a-fA-F]", Number.Hex), | ||
(r"\d+", Number.Integer), | ||
], | ||
"comment": [ | ||
(r"[^*/]", Comment.Multiline), | ||
# Allows for arbitrary nesting | ||
(r"/\*", Comment.Multiline, "#push"), | ||
(r"\*/", Comment.Multiline, "#pop"), | ||
(r"[*/]", Comment.Multiline), | ||
], | ||
"string": [ | ||
('[^"]+', String.Double), | ||
('"', String.Double, "#pop"), | ||
], | ||
"cryptol": [ | ||
(r"[^|}]", using(CryptolLexer)), | ||
(r"\|\}|\}\}", Literal, "#pop"), | ||
(r"[|}]", using(CryptolLexer)), | ||
], | ||
} |