Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheme: Fixed number pattern #2648

Merged
merged 1 commit into from
Nov 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 117 additions & 74 deletions components/prism-scheme.js
Original file line number Diff line number Diff line change
@@ -1,78 +1,121 @@
Prism.languages.scheme = {
// this supports "normal" single-line comments:
// ; comment
// and (potentially nested) multiline comments:
// #| comment #| nested |# still comment |#
// (only 1 level of nesting is supported)
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
'string': {
pattern: /"(?:[^"\\]|\\.)*"/,
greedy: true
},
'symbol': {
pattern: /'[^()#'\s]+/,
greedy: true
},
'character': {
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
greedy: true,
alias: 'string'
},
'lambda-parameter': [
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
{
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
(function (Prism) {
Prism.languages.scheme = {
// this supports "normal" single-line comments:
// ; comment
// and (potentially nested) multiline comments:
// #| comment #| nested |# still comment |#
// (only 1 level of nesting is supported)
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
'string': {
pattern: /"(?:[^"\\]|\\.)*"/,
greedy: true
},
'symbol': {
pattern: /'[^()#'\s]+/,
greedy: true
},
'character': {
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
greedy: true,
alias: 'string'
},
'lambda-parameter': [
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
{
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
lookbehind: true
},
{
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
lookbehind: true
}
],
'keyword': {
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
lookbehind: true
},
'builtin': {
// all functions of the base library of R7RS plus some of built-ins of R5Rs
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
lookbehind: true
},
'operator': {
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
lookbehind: true
},
{
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
'number': {
// The number pattern from [the R7RS spec](https://small.r7rs.org/attachment/r7rs.pdf).
//
// <number> := <num 2>|<num 8>|<num 10>|<num 16>
// <num R> := <prefix R><complex R>
// <complex R> := <real R>(?:@<real R>|<imaginary R>)?|<imaginary R>
// <imaginary R> := [+-](?:<ureal R>|(?:inf|nan)\.0)?i
// <real R> := [+-]?<ureal R>|[+-](?:inf|nan)\.0
// <ureal R> := <uint R>(?:\/<uint R>)?
// | <decimal R>
//
// <decimal 10> := (?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?
// <uint R> := <digit R>+
// <prefix R> := <radix R>(?:#[ei])?|(?:#[ei])?<radix R>
// <radix 2> := #b
// <radix 8> := #o
// <radix 10> := (?:#d)?
// <radix 16> := #x
// <digit 2> := [01]
// <digit 8> := [0-7]
// <digit 10> := \d
// <digit 16> := [0-9a-f]
//
// The problem with this grammar is that the resulting regex is way to complex, so we simplify by grouping all
// non-decimal bases together. This results in a decimal (dec) and combined binary, octal, and hexadecimal (box)
// pattern:
pattern: RegExp(SortedBNF({
'<ureal dec>': /\d+(?:\/\d+)?|(?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?/.source,
'<real dec>': /[+-]?<ureal dec>|[+-](?:inf|nan)\.0/.source,
'<imaginary dec>': /[+-](?:<ureal dec>|(?:inf|nan)\.0)?i/.source,
'<complex dec>': /<real dec>(?:@<real dec>|<imaginary dec>)?|<imaginary dec>/.source,
'<num dec>': /(?:#d(?:#[ei])?|#[ei](?:#d)?)?<complex dec>/.source,

'<ureal box>': /[0-9a-f]+(?:\/[0-9a-f]+)?/.source,
'<real box>': /[+-]?<ureal box>|[+-](?:inf|nan)\.0/.source,
'<imaginary box>': /[+-](?:<ureal box>|(?:inf|nan)\.0)?i/.source,
'<complex box>': /<real box>(?:@<real box>|<imaginary box>)?|<imaginary box>/.source,
'<num box>': /#[box](?:#[ei])?|(?:#[ei])?#[box]<complex box>/.source,

'<number>': /(^|[\s()])(?:<num dec>|<num box>)(?=[()\s]|$)/.source,
}), 'i'),
lookbehind: true
},
'boolean': {
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
lookbehind: true
},
'function': {
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
lookbehind: true
},
'identifier': {
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
lookbehind: true,
greedy: true
},
'punctuation': /[()']/
};

/**
* Given a topologically sorted BNF grammar, this will return the RegExp source of last rule of the grammar.
*
* @param {Record<string, string>} grammar
* @returns {string}
*/
function SortedBNF(grammar) {
for (var key in grammar) {
grammar[key] = grammar[key].replace(/<[\w\s]+>/g, function (key) {
return '(?:' + grammar[key].trim() + ')';
});
}
],
'keyword': {
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
lookbehind: true
},
'builtin': {
// all functions of the base library of R7RS plus some of built-ins of R5Rs
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
lookbehind: true
},
'operator': {
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
lookbehind: true
},
'number': {
// This pattern (apart from the lookarounds) works like this:
//
// Decimal numbers
// <dec real> := \d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+
// <dec complex> := <dec real>(?:[+-]<dec real>i)?|<dec real>i
// <dec prefix> := (?:#d(?:#[ei])?|#[ei](?:#d)?)?
// <dec number> := <dec prefix>[+-]?<complex>
//
// Binary, octal, and hexadecimal numbers
// <b.o.x. real> := [\da-fA-F]+(?:\/[\da-fA-F]+)?
// <b.o.x. complex> := <b.o.x. real>(?:[+-]<b.o.x. real>i)?|<b.o.x. real>i
// <b.o.x. prefix> := #[box](?:#[ei])?|#[ei](?:#[box])?
// <b.o.x. number> := <b.o.x. prefix>[+-]?<b.o.x. complex>
//
// <number> := <dec number>|<b.o.x. number>
pattern: /(^|[\s()])(?:(?:#d(?:#[ei])?|#[ei](?:#d)?)?[+-]?(?:(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)(?:[+-](?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)?|(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)|(?:#[box](?:#[ei])?|#[ei](?:#[box])?)[+-]?(?:[\da-fA-F]+(?:\/[\da-fA-F]+)?(?:[+-][\da-fA-F]+(?:\/[\da-fA-F]+)?i)?|[\da-fA-F]+(?:\/[\da-fA-F]+)?i))(?=[()\s]|$)/,
lookbehind: true
},
'boolean': {
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
lookbehind: true
},
'function': {
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
lookbehind: true
},
'identifier': {
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
lookbehind: true,
greedy: true
},
'punctuation': /[()']/
};
// return the last item
return grammar[key];
}

})(Prism);
2 changes: 1 addition & 1 deletion components/prism-scheme.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 18 additions & 5 deletions tests/languages/racket/number_feature.test
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
123

(foo 42 +42 -42)
(foo 1e3 +1e3 -1e3)
(foo 1e+3 1e-3 3.14159 3.14159e-1)
(foo 8/3)
(foo 3+4i 2.5+0.0i 2.5+0.0i -2.5e4+0.0e4i 3+0i -2e-5i)
(list 10i +10i -10i 10.10i 10+10i 10.10+10.10i 10-10i 10e+10i 10+10e+10i)
(list +10i -10i 10+10i 10.10+10.10i 10-10i 10+10e+10i)

(list #d123 #e#d123e-4 #d#i12 #i-1.234i)

(list #xBAD #b1110011 #o777)
(list #i#x10 #i#x10+10i #b10+10i)

10+i
10+.1i
10+1.i

; not a number but a symbol
(define 1+2 10)

Expand All @@ -19,6 +25,8 @@
----------------------------------------------------

[
["number", "123"],

["punctuation", "("],
["function", "foo"],
["number", "42"],
Expand Down Expand Up @@ -58,14 +66,11 @@

["punctuation", "("],
["builtin", "list"],
["number", "10i"],
["number", "+10i"],
["number", "-10i"],
["number", "10.10i"],
["number", "10+10i"],
["number", "10.10+10.10i"],
["number", "10-10i"],
["number", "10e+10i"],
["number", "10+10e+10i"],
["punctuation", ")"],

Expand All @@ -91,8 +96,16 @@
["number", "#b10+10i"],
["punctuation", ")"],

["number", "10+i"],
["number", "10+.1i"],
["number", "10+1.i"],

["comment", "; not a number but a symbol"],
["punctuation", "("], ["keyword", "define"], " 1+2 ", ["number", "10"], ["punctuation", ")"],
["punctuation", "("],
["keyword", "define"],
" 1+2 ",
["number", "10"],
["punctuation", ")"],

["punctuation", "["],
["function", "foo"],
Expand Down
Loading