Skip to content

Commit 8d0b74b

Browse files
Clojure: Improved tokenization (#3056)
1 parent 148c1ec commit 8d0b74b

8 files changed

+102
-27
lines changed

components/prism-clojure.js

+24-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,34 @@
11
// Copied from https://github.com/jeluard/prism-clojure
22
Prism.languages.clojure = {
3-
'comment': /;.*/,
4-
'string': {
5-
pattern: /"(?:[^"\\]|\\.)*"/,
3+
'comment': {
4+
pattern: /;.*/,
65
greedy: true
76
},
8-
'operator': /(?:::|[:|'])\b[a-z][\w*+!?-]*\b/i, //used for symbols and keywords
7+
'string': [
8+
{
9+
pattern: /"(?:[^"\\]|\\.)*"/,
10+
greedy: true
11+
},
12+
// characters
13+
/\\\w+/
14+
],
15+
'symbol': {
16+
pattern: /(^|[\s()\[\]{},])::?[\w*+!?'<>=/.-]+/,
17+
lookbehind: true
18+
},
919
'keyword': {
10-
pattern: /([^\w+*'?-])(?:def|if|do|let|\.\.|quote|var|->>|->|fn|loop|recur|throw|try|monitor-enter|\.|new|set!|def-|defn|defn-|defmacro|defmulti|defmethod|defstruct|defonce|declare|definline|definterface|defprotocol|==|defrecord|>=|deftype|<=|defproject|ns|\*|\+|-|\/|<|=|>|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|conj|cons|constantly|cond|if-not|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|deref|difference|disj|dissoc|distinct|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|for|fnseq|frest|gensym|get-proxy-class|get|hash-map|hash-set|identical\?|identity|if-let|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|line-seq|list\*|list|load|load-file|locking|long|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|name|namespace|neg\?|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|time|to-array|to-array-2d|tree-seq|true\?|union|up|update-proxy|val|vals|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[^\w+*'?-])/,
20+
pattern: /(\()(?:-|->|->>|\.|\.\.|\*|\/|\+|<|<=|=|==|>|>=|accessor|agent|agent-errors|aget|alength|all-ns|alter|and|append-child|apply|array-map|aset|aset-boolean|aset-byte|aset-char|aset-double|aset-float|aset-int|aset-long|aset-short|assert|assoc|await|await-for|bean|binding|bit-and|bit-not|bit-or|bit-shift-left|bit-shift-right|bit-xor|boolean|branch\?|butlast|byte|cast|char|children|class|clear-agent-errors|comment|commute|comp|comparator|complement|concat|cond|conj|cons|constantly|construct-proxy|contains\?|count|create-ns|create-struct|cycle|dec|declare|def|def-|definline|definterface|defmacro|defmethod|defmulti|defn|defn-|defonce|defproject|defprotocol|defrecord|defstruct|deftype|deref|difference|disj|dissoc|distinct|do|doall|doc|dorun|doseq|dosync|dotimes|doto|double|down|drop|drop-while|edit|end\?|ensure|eval|every\?|false\?|ffirst|file-seq|filter|find|find-doc|find-ns|find-var|first|float|flush|fn|fnseq|for|frest|gensym|get|get-proxy-class|hash-map|hash-set|identical\?|identity|if|if-let|if-not|import|in-ns|inc|index|insert-child|insert-left|insert-right|inspect-table|inspect-tree|instance\?|int|interleave|intersection|into|into-array|iterate|join|key|keys|keyword|keyword\?|last|lazy-cat|lazy-cons|left|lefts|let|line-seq|list|list\*|load|load-file|locking|long|loop|macroexpand|macroexpand-1|make-array|make-node|map|map-invert|map\?|mapcat|max|max-key|memfn|merge|merge-with|meta|min|min-key|monitor-enter|name|namespace|neg\?|new|newline|next|nil\?|node|not|not-any\?|not-every\?|not=|ns|ns-imports|ns-interns|ns-map|ns-name|ns-publics|ns-refers|ns-resolve|ns-unmap|nth|nthrest|or|parse|partial|path|peek|pop|pos\?|pr|pr-str|print|print-str|println|println-str|prn|prn-str|project|proxy|proxy-mappings|quot|quote|rand|rand-int|range|re-find|re-groups|re-matcher|re-matches|re-pattern|re-seq|read|read-line|recur|reduce|ref|ref-set|refer|rem|remove|remove-method|remove-ns|rename|rename-keys|repeat|replace|replicate|resolve|rest|resultset-seq|reverse|rfirst|right|rights|root|rrest|rseq|second|select|select-keys|send|send-off|seq|seq-zip|seq\?|set|set!|short|slurp|some|sort|sort-by|sorted-map|sorted-map-by|sorted-set|special-symbol\?|split-at|split-with|str|string\?|struct|struct-map|subs|subvec|symbol|symbol\?|sync|take|take-nth|take-while|test|throw|time|to-array|to-array-2d|tree-seq|true\?|try|union|up|update-proxy|val|vals|var|var-get|var-set|var\?|vector|vector-zip|vector\?|when|when-first|when-let|when-not|with-local-vars|with-meta|with-open|with-out-str|xml-seq|xml-zip|zero\?|zipmap|zipper)(?=[\s)]|$)/,
1121
lookbehind: true
1222
},
1323
'boolean': /\b(?:true|false|nil)\b/,
14-
'number': /\b[\da-f]+\b/i,
24+
'number': {
25+
pattern: /(^|[^\w$@])(?:\d+(?:[/.]\d+)?(?:e[+-]?\d+)?|0x[a-f0-9]+|[1-9]\d?r[a-z0-9]+)[lmn]?(?![\w$@])/i,
26+
lookbehind: true
27+
},
28+
'function': {
29+
pattern: /((?:^|[^'])\()[\w*+!?'<>=/.-]+(?=[\s)]|$)/,
30+
lookbehind: true
31+
},
32+
'operator': /[#@^`~]/,
1533
'punctuation': /[{}\[\](),]/
1634
};

components/prism-clojure.min.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
(foo args)
2+
3+
; not a function
4+
'(a b c)
5+
6+
----------------------------------------------------
7+
8+
[
9+
["punctuation", "("], ["function", "foo"], " args", ["punctuation", ")"],
10+
11+
["comment", "; not a function"],
12+
"\r\n'", ["punctuation", "("], "a b c", ["punctuation", ")"]
13+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
123
2+
01234
3+
0xFFF
4+
2r0101011
5+
8r52
6+
36r16
7+
1.0
8+
1M
9+
2/3
10+
0.6666666666666666
11+
36786883868216818816N
12+
13+
----------------------------------------------------
14+
15+
[
16+
["number", "123"],
17+
["number", "01234"],
18+
["number", "0xFFF"],
19+
["number", "2r0101011"],
20+
["number", "8r52"],
21+
["number", "36r16"],
22+
["number", "1.0"],
23+
["number", "1M"],
24+
["number", "2/3"],
25+
["number", "0.6666666666666666"],
26+
["number", "36786883868216818816N"]
27+
]

tests/languages/clojure/operator_and_punctuation.test

-20
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# @ ^ ` ~
2+
3+
----------------------------------------------------
4+
5+
[
6+
["operator", "#"],
7+
["operator", "@"],
8+
["operator", "^"],
9+
["operator", "`"],
10+
["operator", "~"]
11+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{ } [ ] ( )
2+
,
3+
4+
----------------------------------------------------
5+
6+
[
7+
["punctuation", "{"],
8+
["punctuation", "}"],
9+
["punctuation", "["],
10+
["punctuation", "]"],
11+
["punctuation", "("],
12+
["punctuation", ")"],
13+
14+
["punctuation", ","]
15+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
:foo
2+
:foo/bar-baz
3+
::foo
4+
5+
----------------------------------------------------
6+
7+
[
8+
["symbol", ":foo"],
9+
["symbol", ":foo/bar-baz"],
10+
["symbol", "::foo"]
11+
]

0 commit comments

Comments
 (0)