Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
131eddf
optimization for String::format
wenshao Sep 16, 2023
77d0d5a
remove print fast-path changes
wenshao Sep 17, 2023
32f9990
parse fast-path support more pattern
wenshao Sep 17, 2023
0607e08
parse fast-path support more pattern and add benchmark case
wenshao Sep 18, 2023
dbcebaa
bug fix
wenshao Sep 18, 2023
d78149f
parse fast-path support more pattern
wenshao Sep 18, 2023
7692a1c
remove unused comment
wenshao Sep 18, 2023
17ab583
parse fast-path support more specifiers
wenshao Sep 18, 2023
c28ab59
fix specifiers support '%<s'
wenshao Sep 19, 2023
0ccc6a6
fix specifiers duplicate flags not throw error
wenshao Sep 19, 2023
4d6d1e8
drop the regex code entirely and write a custom parser
wenshao Sep 20, 2023
a71031e
bug fix
wenshao Sep 20, 2023
3cebe97
bug fix for '%T' not throw error
wenshao Sep 20, 2023
f303f29
refactor
wenshao Sep 20, 2023
b3ca246
refactor & bug fix
wenshao Sep 20, 2023
59c2983
shared between Formatter and FormatProcessor
wenshao Sep 21, 2023
2042751
restore StringFormat
wenshao Sep 24, 2023
eef0ca6
add decimal benchmark
wenshao Sep 24, 2023
2153a22
Merge remote-tracking branch 'upstream/master' into optim_for_string_…
wenshao Sep 24, 2023
9f229b0
import BigDecimal
wenshao Sep 24, 2023
0d977b2
refactor and cache single conversion FormatSpecifier
wenshao Sep 24, 2023
7b831ab
Revert "refactor and cache single conversion FormatSpecifier"
wenshao Sep 24, 2023
155d004
fix logic error
wenshao Sep 24, 2023
f85b9d4
remove unused code
wenshao Sep 25, 2023
6be4d46
remove unused code
wenshao Sep 25, 2023
ba4660a
refactor for review & remove comment
wenshao Sep 25, 2023
eafac65
code format
wenshao Sep 27, 2023
8a6fe0e
fix : the exception thrown when the input does not include conversion…
wenshao Sep 27, 2023
3ff5121
Refactor according to rgiulietti's suggestion and add testcases
wenshao Sep 27, 2023
b19dc51
Fix from @rgiulietti review
wenshao Sep 28, 2023
ad7f3bd
Improve the readability of parseArgument, suggestion from @rgiulietti
wenshao Sep 28, 2023
7b3ce95
Improve the readability, suggestion from @rgiulietti
wenshao Oct 5, 2023
7a1cd11
add copyright info
wenshao Oct 5, 2023
134f2b2
move testcase from BasicInt to Basic-X
wenshao Oct 9, 2023
bce554a
move testcases to Basic.java
wenshao Oct 9, 2023
fd5a5f2
fix from @rgiulietti 's review
wenshao Oct 18, 2023
d8d3ef4
fix FormatterBuilder testcase handle lineSeparator on windows
wenshao Oct 18, 2023
abb9022
add document
wenshao Oct 20, 2023
289a024
fix from @rgiulietti 's review
wenshao Oct 20, 2023
9618d61
Merge remote-tracking branch 'upstream/master' into optim_for_string_…
wenshao Dec 21, 2023
a5f1a4f
Merge remote-tracking branch 'upstream/master' into optim_for_string_…
wenshao Jan 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions src/java.base/share/classes/java/util/FormatProcessor.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -30,7 +31,6 @@
import java.lang.invoke.MethodType;
import java.lang.StringTemplate.Processor;
import java.lang.StringTemplate.Processor.Linkage;
import java.util.regex.Matcher;

import jdk.internal.javac.PreviewFeature;

Expand Down Expand Up @@ -218,22 +218,35 @@ public MethodHandle linkage(List<String> fragments, MethodType type) {
* @throws MissingFormatArgumentException if not at end or found and not needed
*/
private static boolean findFormat(String fragment, boolean needed) {
Matcher matcher = Formatter.FORMAT_SPECIFIER_PATTERN.matcher(fragment);
String group;

while (matcher.find()) {
group = matcher.group();
int max = fragment.length();
for (int i = 0; i < max;) {
int n = fragment.indexOf('%', i);
if (n < 0) {
return false;
}

if (!group.equals("%%") && !group.equals("%n")) {
if (matcher.end() == fragment.length() && needed) {
return true;
}
i = n + 1;
if (i >= max) {
return false;
}

throw new MissingFormatArgumentException(group +
" is not immediately followed by an embedded expression");
char c = fragment.charAt(i);
if (c == '%' || c == 'n') {
i++;
continue;
}
int off = new Formatter.FormatSpecifierParser(null, c, i, fragment, max)
.parse();
if (off == 0) {
return false;
}
if (i + off == max && needed) {
return true;
}
throw new MissingFormatArgumentException(
fragment.substring(i - 1, i + off)
+ " is not immediately followed by an embedded expression");
}

return false;
}

Expand Down
228 changes: 203 additions & 25 deletions src/java.base/share/classes/java/util/Formatter.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2003, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, Alibaba Group Holding Limited. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -49,8 +50,6 @@
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.text.spi.NumberFormatProvider;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import java.time.DateTimeException;
import java.time.Instant;
Expand Down Expand Up @@ -2810,20 +2809,14 @@ public Formatter format(Locale l, String format, Object ... args) {
return this;
}

// %[argument_index$][flags][width][.precision][t]conversion
static final String FORMAT_SPECIFIER
= "%(\\d+\\$)?([-#+ 0,(\\<]*)?(\\d+)?(\\.\\d+)?([tT])?([a-zA-Z%])";

static final Pattern FORMAT_SPECIFIER_PATTERN = Pattern.compile(FORMAT_SPECIFIER);

/**
* Finds format specifiers in the format string.
*/
static List<FormatString> parse(String s) {
FormatSpecifierParser parser = null;
ArrayList<FormatString> al = new ArrayList<>();
int i = 0;
int max = s.length();
Matcher m = null; // create if needed
while (i < max) {
int n = s.indexOf('%', i);
if (n < 0) {
Expand All @@ -2846,14 +2839,16 @@ static List<FormatString> parse(String s) {
al.add(new FormatSpecifier(c));
i++;
} else {
if (m == null) {
m = FORMAT_SPECIFIER_PATTERN.matcher(s);
}
// We have already parsed a '%' at n, so we either have a
// match or the specifier at n is invalid
if (m.find(n) && m.start() == n) {
al.add(new FormatSpecifier(s, m));
i = m.end();
if (parser == null) {
parser = new FormatSpecifierParser(al, c, i, s, max);
} else {
parser.reset(c, i);
}
int off = parser.parse();
if (off > 0) {
i += off;
} else {
throw new UnknownFormatConversionException(String.valueOf(c));
}
Expand All @@ -2862,6 +2857,159 @@ static List<FormatString> parse(String s) {
return al;
}

static final class FormatSpecifierParser {
final ArrayList<FormatString> al;
final String s;
final int max;
char first;
int start;
int off;
char c;
int argSize;
int flagSize;
int widthSize;

FormatSpecifierParser(ArrayList<FormatString> al, char first, int start, String s, int max) {
this.al = al;

this.first = first;
this.c = first;
this.start = start;
this.off = start;

this.s = s;
this.max = max;
}

void reset(char first, int start) {
this.first = first;
this.c = first;
this.start = start;
this.off = start;

argSize = 0;
flagSize = 0;
widthSize = 0;
}

/**
* If a valid format specifier is found, construct a FormatString and add it to {@link #al}.
* The format specifiers for general, character, and numeric types have
* the following syntax:
*
* <blockquote><pre>
* %[argument_index$][flags][width][.precision]conversion
* </pre></blockquote>
*
* As described by the following regular expression:
*
* <blockquote><pre>
* %(\d+\$)?([-#+ 0,(\<]*)?(\d+)?(\.\d+)?([tT])?([a-zA-Z%])
* </pre></blockquote>
*
* @return the length of the format specifier. If no valid format specifier is found, 0 is returned.
*/
int parse() {
int precisionSize = 0;

// (\d+\$)?
parseArgument();

// ([-#+ 0,(\<]*)?
parseFlag();

// (\d+)?
parseWidth();

if (c == '.') {
// (\.\d+)?
precisionSize = parsePrecision();
if (precisionSize == -1) {
return 0;
}
}

// ([tT])?([a-zA-Z%])
char t = '\0', conversion = '\0';
if ((c == 't' || c == 'T') && off + 1 < max) {
char c1 = s.charAt(off + 1);
if (isConversion(c1)) {
t = c;
conversion = c1;
off += 2;
}
} else if (isConversion(c)) {
conversion = c;
++off;
} else {
return 0;
}

if (argSize + flagSize + widthSize + precisionSize + t + conversion != 0) {
if (al != null) {
FormatSpecifier formatSpecifier
= new FormatSpecifier(s, start, argSize, flagSize, widthSize, precisionSize, t, conversion);
al.add(formatSpecifier);
}
return off - start;
}
return 0;
}

private void parseArgument() {
// (\d+\$)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i == off || c != '$') {
c = first;
return;
}

i++; // skip '$'
if (i < max) {
c = s.charAt(i);
}

argSize = i - off;
off = i;
}

private void parseFlag() {
// ([-#+ 0,(\<]*)?
int i = off;
for (; i < max && Flags.isFlag(c = s.charAt(i)); ++i); // empty body
flagSize = i - off;
off = i;
}

private void parseWidth() {
// (\d+)?
int i = off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
widthSize = i - off;
off = i;
}

private int parsePrecision() {
int i = ++off;
for (; i < max && isDigit(c = s.charAt(i)); ++i); // empty body
if (i != off) {
int size = i - off + 1;
off = i;
return size;
}
return -1;
}
}

static boolean isConversion(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '%';
}

private static boolean isDigit(char c) {
return c >= '0' && c <= '9';
}

interface FormatString {
int index();
void print(Formatter fmt, Object arg, Locale l) throws IOException;
Expand Down Expand Up @@ -2984,21 +3132,44 @@ private void conversion(char conv) {
}
}

FormatSpecifier(String s, Matcher m) {
index(s, m.start(1), m.end(1));
flags(s, m.start(2), m.end(2));
width(s, m.start(3), m.end(3));
precision(s, m.start(4), m.end(4));

int tTStart = m.start(5);
if (tTStart >= 0) {
FormatSpecifier(
String s,
int i,
int argSize,
int flagSize,
int widthSize,
int precisionSize,
char t,
char conversion
) {
int argEnd = i + argSize;
int flagEnd = argEnd + flagSize;
int widthEnd = flagEnd + widthSize;
int precisionEnd = widthEnd + precisionSize;

if (argSize > 0) {
index(s, i, argEnd);
}
if (flagSize > 0) {
flags(s, argEnd, flagEnd);
}
if (widthSize > 0) {
width(s, flagEnd, widthEnd);
}
if (precisionSize > 0) {
precision(s, widthEnd, precisionEnd);
}
if (t != '\0') {
dt = true;
if (s.charAt(tTStart) == 'T') {
if (t == 'T') {
flags = Flags.add(flags, Flags.UPPERCASE);
}
}
conversion(s.charAt(m.start(6)));
conversion(conversion);
check();
}

private void check() {
if (dt)
checkDateTime();
else if (Conversion.isGeneral(c))
Expand Down Expand Up @@ -4705,6 +4876,13 @@ private static int parse(char c) {
};
}

private static boolean isFlag(char c) {
return switch (c) {
case '-', '#', '+', ' ', '0', ',', '(', '<' -> true;
default -> false;
};
}

// Returns a string representation of the current {@code Flags}.
public static String toString(int f) {
StringBuilder sb = new StringBuilder();
Expand Down
Loading