diff --git a/src/languages/java.js b/src/languages/java.js index f349a76ad0..45b314c1a3 100644 --- a/src/languages/java.js +++ b/src/languages/java.js @@ -5,11 +5,12 @@ Category: common, enterprise Website: https://www.java.com/ */ +import * as regex from "../lib/regex"; + export default function(hljs) { var JAVA_IDENT_RE = '[\u00C0-\u02B8a-zA-Z_$][\u00C0-\u02B8a-zA-Z_$0-9]*'; var GENERIC_IDENT_RE = JAVA_IDENT_RE + '(<' + JAVA_IDENT_RE + '(\\s*,\\s*' + JAVA_IDENT_RE + ')*>)?'; - var KEYWORDS = - 'false synchronized int abstract float private char boolean var static null if const ' + + var KEYWORDS = 'false synchronized int abstract float private char boolean var static null if const ' + 'for true while long strictfp finally protected import native final void ' + 'enum else break transient catch instanceof byte super volatile case assert short ' + 'package default double public try this switch continue throws protected public private ' + @@ -18,32 +19,53 @@ export default function(hljs) { var ANNOTATION = { className: 'meta', begin: '@' + JAVA_IDENT_RE, - contains:[ + contains: [ { begin: /\(/, end: /\)/, contains: ["self"] // allow nested () inside our annotation }, ] - } - // https://docs.oracle.com/javase/7/docs/technotes/guides/language/underscores-literals.html - var JAVA_NUMBER_RE = '\\b' + - '(' + - '0[bB]([01]+[01_]+[01]+|[01]+)' + // 0b... - '|' + - '0[xX]([a-fA-F0-9]+[a-fA-F0-9_]+[a-fA-F0-9]+|[a-fA-F0-9]+)' + // 0x... - '|' + - '(' + - '([\\d]+[\\d_]+[\\d]+|[\\d]+)(\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))?' + - '|' + - '\\.([\\d]+[\\d_]+[\\d]+|[\\d]+)' + - ')' + - '([eE][-+]?\\d+)?' + // octal, decimal, float - ')' + - '[lLfF]?'; + }; + /** + * A given sequence, possibly with underscores + * @type {(s: string | RegExp) => string} */ + var SEQUENCE_ALLOWING_UNDERSCORES = (seq) => regex.concat('[', seq, ']+([', seq, '_]*[', seq, ']+)?'); var JAVA_NUMBER_MODE = { className: 'number', - begin: JAVA_NUMBER_RE, + variants: [ + { begin: `\\b(0[bB]${SEQUENCE_ALLOWING_UNDERSCORES('01')})[lL]?` }, // binary + { begin: `\\b(0${SEQUENCE_ALLOWING_UNDERSCORES('0-7')})[dDfFlL]?` }, // octal + { + begin: regex.concat( + /\b0[xX]/, + regex.either( + regex.concat(SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9'), /\./, SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9')), + regex.concat(SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9'), /\.?/), + regex.concat(/\./, SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9')), + ), + /([pP][+-]?(\d+))?/, + /[fFdDlL]?/ // decimal & fp mixed for simplicity + ) + }, + // scientific notation + { begin: regex.concat( + /\b/, + regex.either( + regex.concat(/\d*\./, SEQUENCE_ALLOWING_UNDERSCORES("\\d")), // .3, 3.3, 3.3_3 + SEQUENCE_ALLOWING_UNDERSCORES("\\d") // 3, 3_3 + ), + /[eE][+-]?[\d]+[dDfF]?/) + }, + // decimal & fp mixed for simplicity + { begin: regex.concat( + /\b/, + SEQUENCE_ALLOWING_UNDERSCORES(/\d/), + regex.optional(/\.?/), + regex.optional(SEQUENCE_ALLOWING_UNDERSCORES(/\d/)), + /[dDfFlL]?/) + } + ], relevance: 0 }; @@ -57,15 +79,15 @@ export default function(hljs) { '/\\*\\*', '\\*/', { - relevance : 0, - contains : [ + relevance: 0, + contains: [ { // eat up @'s in emails to prevent them to be recognized as doctags begin: /\w+@/, relevance: 0 }, { - className : 'doctag', - begin : '@[A-Za-z]+' + className: 'doctag', + begin: '@[A-Za-z]+' } ] } @@ -80,7 +102,7 @@ export default function(hljs) { keywords: 'class interface', illegal: /[:"\[\]]/, contains: [ - {beginKeywords: 'extends implements'}, + { beginKeywords: 'extends implements' }, hljs.UNDERSCORE_TITLE_MODE ] }, diff --git a/src/lib/regex.js b/src/lib/regex.js index 6f2a5967d9..53a9158ac7 100644 --- a/src/lib/regex.js +++ b/src/lib/regex.js @@ -25,6 +25,14 @@ export function lookahead(re) { return concat('(?=', re, ')'); } +/** + * @param {RegExp | string } re + * @returns {string} + */ +export function optional(re) { + return concat('(', re, ')?'); +} + /** * @param {...(RegExp | string) } args * @returns {string} @@ -34,6 +42,18 @@ export function concat(...args) { return joined; } +/** + * Any of the passed expresssions may match + * + * Creates a huge this | this | that | that match + * @param {(RegExp | string)[] } args + * @returns {string} + */ +export function either(...args) { + const joined = '(' + args.map((x) => source(x)).join("|") + ")"; + return joined; +} + /** * @param {RegExp} re * @returns {number} diff --git a/test/markup/java/numbers.expect.txt b/test/markup/java/numbers.expect.txt index 713579cf6f..7ba247f209 100644 --- a/test/markup/java/numbers.expect.txt +++ b/test/markup/java/numbers.expect.txt @@ -7,3 +7,29 @@ byte nybbles = 0b0010_0101; long bytes = 0b11010010_01101001_10010100_10010010; int n = 1234 + Contacts._ID; +float f = 0x1.4p2f; +double d = 0x.ep-6; +int octal = 0777; +float f = 2e3f; +double d = 1.2e4D; +a = 0x4fa6p2; +b = 0x.4p2; +c = 0xa.ffp3f; +d = 0x1.0p2F; +e = 0x1.0p2f; +f = 0x1p1; +g = 0x.3p4d; +h = 0x1.2ep5D; +i = 0x1.p2; +int i = 23; +byte mask = 0x0f; +int i = 4; +byte mask = 0xa; +float f = 5.4; +float f = 2e3; +int n = 0b1; +float f = 3.; +f = 3_3.; +// TODO: in the future +// float f = .2; +// f = .2_022; diff --git a/test/markup/java/numbers.txt b/test/markup/java/numbers.txt index f68a40c7ad..e31545f70e 100644 --- a/test/markup/java/numbers.txt +++ b/test/markup/java/numbers.txt @@ -7,3 +7,29 @@ long maxLong = 0x7fff_ffff_ffff_ffffL; byte nybbles = 0b0010_0101; long bytes = 0b11010010_01101001_10010100_10010010; int n = 1234 + Contacts._ID; +float f = 0x1.4p2f; +double d = 0x.ep-6; +int octal = 0777; +float f = 2e3f; +double d = 1.2e4D; +a = 0x4fa6p2; +b = 0x.4p2; +c = 0xa.ffp3f; +d = 0x1.0p2F; +e = 0x1.0p2f; +f = 0x1p1; +g = 0x.3p4d; +h = 0x1.2ep5D; +i = 0x1.p2; +int i = 23; +byte mask = 0x0f; +int i = 4; +byte mask = 0xa; +float f = 5.4; +float f = 2e3; +int n = 0b1; +float f = 3.; +f = 3_3.; +// TODO: in the future +// float f = .2; +// f = .2_022;