diff --git a/src/languages/java.js b/src/languages/java.js
index f349a76ad0..45b314c1a3 100644
--- a/src/languages/java.js
+++ b/src/languages/java.js
@@ -5,11 +5,12 @@ Category: common, enterprise
Website: https://www.java.com/
*/
+import * as regex from "../lib/regex";
+
export default function(hljs) {
var JAVA_IDENT_RE = '[\u00C0-\u02B8a-zA-Z_$][\u00C0-\u02B8a-zA-Z_$0-9]*';
var GENERIC_IDENT_RE = JAVA_IDENT_RE + '(<' + JAVA_IDENT_RE + '(\\s*,\\s*' + JAVA_IDENT_RE + ')*>)?';
- var KEYWORDS =
- 'false synchronized int abstract float private char boolean var static null if const ' +
+ var KEYWORDS = 'false synchronized int abstract float private char boolean var static null if const ' +
'for true while long strictfp finally protected import native final void ' +
'enum else break transient catch instanceof byte super volatile case assert short ' +
'package default double public try this switch continue throws protected public private ' +
@@ -18,32 +19,53 @@ export default function(hljs) {
var ANNOTATION = {
className: 'meta',
begin: '@' + JAVA_IDENT_RE,
- contains:[
+ contains: [
{
begin: /\(/,
end: /\)/,
contains: ["self"] // allow nested () inside our annotation
},
]
- }
- // https://docs.oracle.com/javase/7/docs/technotes/guides/language/underscores-literals.html
- var JAVA_NUMBER_RE = '\\b' +
- '(' +
- '0[bB]([01]+[01_]+[01]+|[01]+)' + // 0b...
- '|' +
- '0[xX]([a-fA-F0-9]+[a-fA-F0-9_]+[a-fA-F0-9]+|[a-fA-F0-9]+)' + // 0x...
- '|' +
- '(' +
- '([\\d]+[\\d_]+[\\d]+|[\\d]+)(\\.([\\d]+[\\d_]+[\\d]+|[\\d]+))?' +
- '|' +
- '\\.([\\d]+[\\d_]+[\\d]+|[\\d]+)' +
- ')' +
- '([eE][-+]?\\d+)?' + // octal, decimal, float
- ')' +
- '[lLfF]?';
+ };
+ /**
+ * A given sequence, possibly with underscores
+ * @type {(s: string | RegExp) => string} */
+ var SEQUENCE_ALLOWING_UNDERSCORES = (seq) => regex.concat('[', seq, ']+([', seq, '_]*[', seq, ']+)?');
var JAVA_NUMBER_MODE = {
className: 'number',
- begin: JAVA_NUMBER_RE,
+ variants: [
+ { begin: `\\b(0[bB]${SEQUENCE_ALLOWING_UNDERSCORES('01')})[lL]?` }, // binary
+ { begin: `\\b(0${SEQUENCE_ALLOWING_UNDERSCORES('0-7')})[dDfFlL]?` }, // octal
+ {
+ begin: regex.concat(
+ /\b0[xX]/,
+ regex.either(
+ regex.concat(SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9'), /\./, SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9')),
+ regex.concat(SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9'), /\.?/),
+ regex.concat(/\./, SEQUENCE_ALLOWING_UNDERSCORES('a-fA-F0-9')),
+ ),
+ /([pP][+-]?(\d+))?/,
+ /[fFdDlL]?/ // decimal & fp mixed for simplicity
+ )
+ },
+ // scientific notation
+ { begin: regex.concat(
+ /\b/,
+ regex.either(
+ regex.concat(/\d*\./, SEQUENCE_ALLOWING_UNDERSCORES("\\d")), // .3, 3.3, 3.3_3
+ SEQUENCE_ALLOWING_UNDERSCORES("\\d") // 3, 3_3
+ ),
+ /[eE][+-]?[\d]+[dDfF]?/)
+ },
+ // decimal & fp mixed for simplicity
+ { begin: regex.concat(
+ /\b/,
+ SEQUENCE_ALLOWING_UNDERSCORES(/\d/),
+ regex.optional(/\.?/),
+ regex.optional(SEQUENCE_ALLOWING_UNDERSCORES(/\d/)),
+ /[dDfFlL]?/)
+ }
+ ],
relevance: 0
};
@@ -57,15 +79,15 @@ export default function(hljs) {
'/\\*\\*',
'\\*/',
{
- relevance : 0,
- contains : [
+ relevance: 0,
+ contains: [
{
// eat up @'s in emails to prevent them to be recognized as doctags
begin: /\w+@/, relevance: 0
},
{
- className : 'doctag',
- begin : '@[A-Za-z]+'
+ className: 'doctag',
+ begin: '@[A-Za-z]+'
}
]
}
@@ -80,7 +102,7 @@ export default function(hljs) {
keywords: 'class interface',
illegal: /[:"\[\]]/,
contains: [
- {beginKeywords: 'extends implements'},
+ { beginKeywords: 'extends implements' },
hljs.UNDERSCORE_TITLE_MODE
]
},
diff --git a/src/lib/regex.js b/src/lib/regex.js
index 6f2a5967d9..53a9158ac7 100644
--- a/src/lib/regex.js
+++ b/src/lib/regex.js
@@ -25,6 +25,14 @@ export function lookahead(re) {
return concat('(?=', re, ')');
}
+/**
+ * @param {RegExp | string } re
+ * @returns {string}
+ */
+export function optional(re) {
+ return concat('(', re, ')?');
+}
+
/**
* @param {...(RegExp | string) } args
* @returns {string}
@@ -34,6 +42,18 @@ export function concat(...args) {
return joined;
}
+/**
+ * Any of the passed expresssions may match
+ *
+ * Creates a huge this | this | that | that match
+ * @param {(RegExp | string)[] } args
+ * @returns {string}
+ */
+export function either(...args) {
+ const joined = '(' + args.map((x) => source(x)).join("|") + ")";
+ return joined;
+}
+
/**
* @param {RegExp} re
* @returns {number}
diff --git a/test/markup/java/numbers.expect.txt b/test/markup/java/numbers.expect.txt
index 713579cf6f..7ba247f209 100644
--- a/test/markup/java/numbers.expect.txt
+++ b/test/markup/java/numbers.expect.txt
@@ -7,3 +7,29 @@
byte nybbles = 0b0010_0101;
long bytes = 0b11010010_01101001_10010100_10010010;
int n = 1234 + Contacts._ID;
+float f = 0x1.4p2f;
+double d = 0x.ep-6;
+int octal = 0777;
+float f = 2e3f;
+double d = 1.2e4D;
+a = 0x4fa6p2;
+b = 0x.4p2;
+c = 0xa.ffp3f;
+d = 0x1.0p2F;
+e = 0x1.0p2f;
+f = 0x1p1;
+g = 0x.3p4d;
+h = 0x1.2ep5D;
+i = 0x1.p2;
+int i = 23;
+byte mask = 0x0f;
+int i = 4;
+byte mask = 0xa;
+float f = 5.4;
+float f = 2e3;
+int n = 0b1;
+float f = 3.;
+f = 3_3.;
+
+
+
diff --git a/test/markup/java/numbers.txt b/test/markup/java/numbers.txt
index f68a40c7ad..e31545f70e 100644
--- a/test/markup/java/numbers.txt
+++ b/test/markup/java/numbers.txt
@@ -7,3 +7,29 @@ long maxLong = 0x7fff_ffff_ffff_ffffL;
byte nybbles = 0b0010_0101;
long bytes = 0b11010010_01101001_10010100_10010010;
int n = 1234 + Contacts._ID;
+float f = 0x1.4p2f;
+double d = 0x.ep-6;
+int octal = 0777;
+float f = 2e3f;
+double d = 1.2e4D;
+a = 0x4fa6p2;
+b = 0x.4p2;
+c = 0xa.ffp3f;
+d = 0x1.0p2F;
+e = 0x1.0p2f;
+f = 0x1p1;
+g = 0x.3p4d;
+h = 0x1.2ep5D;
+i = 0x1.p2;
+int i = 23;
+byte mask = 0x0f;
+int i = 4;
+byte mask = 0xa;
+float f = 5.4;
+float f = 2e3;
+int n = 0b1;
+float f = 3.;
+f = 3_3.;
+// TODO: in the future
+// float f = .2;
+// f = .2_022;