Skip to content

Commit

Permalink
8314774: Optimize URLEncoder
Browse files Browse the repository at this point in the history
Reviewed-by: redestad, dfuchs
  • Loading branch information
Glavo authored and cl4es committed Sep 19, 2023
1 parent 7c5f2a2 commit f25c920
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 124 deletions.
74 changes: 31 additions & 43 deletions src/java.base/share/classes/java/net/URLEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet;
import java.util.Objects;
import java.util.HexFormat;
import java.util.function.IntPredicate;

import jdk.internal.util.ImmutableBitSetPredicate;
Expand Down Expand Up @@ -81,7 +82,6 @@
*/
public class URLEncoder {
private static final IntPredicate DONT_NEED_ENCODING;
private static final int CASE_DIFF = ('a' - 'A');
private static final String DEFAULT_ENCODING_NAME;

static {
Expand Down Expand Up @@ -138,6 +138,11 @@ public class URLEncoder {
DEFAULT_ENCODING_NAME = StaticProperty.fileEncoding();
}

private static void encodeByte(StringBuilder out, byte b) {
out.append('%');
HexFormat.of().withUpperCase().toHexDigits(out, b);
}

/**
* You can't call the constructor.
*/
Expand Down Expand Up @@ -222,20 +227,30 @@ public static String encode(String s, String enc)
public static String encode(String s, Charset charset) {
Objects.requireNonNull(charset, "charset");

boolean needToChange = false;
StringBuilder out = new StringBuilder(s.length());
int i;
for (i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (!DONT_NEED_ENCODING.test(c) || c == ' ') {
break;
}
}
if (i == s.length()) {
return s;
}

StringBuilder out = new StringBuilder(s.length() << 1);
CharArrayWriter charArrayWriter = new CharArrayWriter();
if (i > 0) {
out.append(s, 0, i);
}

for (int i = 0; i < s.length();) {
int c = s.charAt(i);
//System.out.println("Examining character: " + c);
while (i < s.length()) {
char c = s.charAt(i);
if (DONT_NEED_ENCODING.test(c)) {
if (c == ' ') {
c = '+';
needToChange = true;
}
//System.out.println("Storing: " + c);
out.append((char)c);
out.append(c);
i++;
} else {
// convert to external encoding before hex conversion
Expand All @@ -245,27 +260,14 @@ public static String encode(String s, Charset charset) {
* If this character represents the start of a Unicode
* surrogate pair, then pass in two characters. It's not
* clear what should be done if a byte reserved in the
* surrogate pairs range occurs outside of a legal
* surrogate pairs range occurs outside a legal
* surrogate pair. For now, just treat it as if it were
* any other character.
*/
if (c >= 0xD800 && c <= 0xDBFF) {
/*
System.out.println(Integer.toHexString(c)
+ " is high surrogate");
*/
if ( (i+1) < s.length()) {
int d = s.charAt(i+1);
/*
System.out.println("\tExamining "
+ Integer.toHexString(d));
*/
if (d >= 0xDC00 && d <= 0xDFFF) {
/*
System.out.println("\t"
+ Integer.toHexString(d)
+ " is low surrogate");
*/
if (Character.isHighSurrogate(c)) {
if ((i + 1) < s.length()) {
char d = s.charAt(i + 1);
if (Character.isLowSurrogate(d)) {
charArrayWriter.write(d);
i++;
}
Expand All @@ -274,29 +276,15 @@ public static String encode(String s, Charset charset) {
i++;
} while (i < s.length() && !DONT_NEED_ENCODING.test((c = s.charAt(i))));

charArrayWriter.flush();
String str = charArrayWriter.toString();
byte[] ba = str.getBytes(charset);
for (byte b : ba) {
out.append('%');
char ch = Character.forDigit((b >> 4) & 0xF, 16);
// converting to use uppercase letter as part of
// the hex value if ch is a letter.
if (Character.isLetter(ch)) {
ch -= CASE_DIFF;
}
out.append(ch);
ch = Character.forDigit(b & 0xF, 16);
if (Character.isLetter(ch)) {
ch -= CASE_DIFF;
}
out.append(ch);
encodeByte(out, b);
}
charArrayWriter.reset();
needToChange = true;
}
}

return (needToChange? out.toString() : s);
return out.toString();
}
}
125 changes: 44 additions & 81 deletions test/jdk/java/net/URLEncoder/SurrogatePairs.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -26,11 +26,18 @@
* @bug 4396708
* @summary Test URL encoder and decoder on a string that contains
* surrogate pairs.
*
* @run junit SurrogatePairs
*/

import java.io.*;
import java.net.*;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.MethodSource;

import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.stream.Collectors;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.jupiter.api.Assertions.*;

/*
* Surrogate pairs are two character Unicode sequences where the first
Expand All @@ -40,86 +47,42 @@
*/
public class SurrogatePairs {

static String[] testStrings = {"\uD800\uDC00",
"\uD800\uDFFF",
"\uDBFF\uDC00",
"\uDBFF\uDFFF",
"1\uDBFF\uDC00",
"@\uDBFF\uDC00",
"\uDBFF\uDC001",
"\uDBFF\uDC00@",
"\u0101\uDBFF\uDC00",
"\uDBFF\uDC00\u0101"
};

static String[] correctEncodings = {"%F0%90%80%80",
"%F0%90%8F%BF",
"%F4%8F%B0%80",
"%F4%8F%BF%BF",
"1%F4%8F%B0%80",
"%40%F4%8F%B0%80",
"%F4%8F%B0%801",
"%F4%8F%B0%80%40",
"%C4%81%F4%8F%B0%80",
"%F4%8F%B0%80%C4%81"
};

public static void main(String[] args) throws Exception {

for (int i=0; i < testStrings.length; i++) {
test(testStrings[i], correctEncodings[i]);
}
public static String[][] arguments() {
return new String[][] {
{"\uD800\uDC00", "%F0%90%80%80"},
{"\uD800\uDFFF", "%F0%90%8F%BF"},
{"\uDBFF\uDC00", "%F4%8F%B0%80"},
{"\uDBFF\uDFFF", "%F4%8F%BF%BF"},
{"1\uDBFF\uDC00", "1%F4%8F%B0%80"},
{"@\uDBFF\uDC00", "%40%F4%8F%B0%80"},
{"\uDBFF\uDC001", "%F4%8F%B0%801"},
{"\uDBFF\uDC00@", "%F4%8F%B0%80%40"},
{"\u0101\uDBFF\uDC00", "%C4%81%F4%8F%B0%80"},
{"\uDBFF\uDC00\u0101", "%F4%8F%B0%80%C4%81"},
{"\uDE0A\uD83D", "%3F%3F"},
{"1\uDE0A\uD83D", "1%3F%3F"},
{"@\uDE0A\uD83D", "%40%3F%3F"},
{"1@1\uDE0A\uD800\uDC00 \uD83D", "1%401%3F%F0%90%80%80+%3F"}
};
}

private static void test(String str, String correctEncoding)
throws Exception {

System.out.println("Unicode bytes of test string are: "
+ getHexBytes(str));

String encoded = URLEncoder.encode(str, "UTF-8");

System.out.println("URLEncoding is: " + encoded);

if (encoded.equals(correctEncoding))
System.out.println("The encoding is correct!");
else {
throw new Exception("The encoding is incorrect!" +
" It should be " + correctEncoding);
}

String decoded = URLDecoder.decode(encoded, "UTF-8");

System.out.println("Unicode bytes for URLDecoding are: "
+ getHexBytes(decoded));

if (str.equals(decoded))
System.out.println("The decoding is correct");
else {
throw new Exception("The decoded is not equal to the original");
}
System.out.println("---");
@ParameterizedTest
@MethodSource("arguments")
public void test(String str, String correctEncoding) {
String encoded = URLEncoder.encode(str, UTF_8);
assertEquals(correctEncoding, encoded, () ->
"str=%s, expected=%s, actual=%s"
.formatted(escape(str), escape(correctEncoding), escape(encoded)));

// Map unmappable characters to '?'
String cleanStr = new String(str.getBytes(UTF_8), UTF_8);
String decoded = URLDecoder.decode(encoded, UTF_8);
assertEquals(cleanStr, decoded, () ->
"expected=%s, actual=%s".formatted(escape(str), escape(decoded)));
}

private static String getHexBytes(String s) throws Exception {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {

int a = s.charAt(i);
int b1 = (a >>8) & 0xff;
int b2 = (byte)a;
int b11 = (b1>>4) & 0x0f;
int b12 = b1 & 0x0f;
int b21 = (b2 >>4) & 0x0f;
int b22 = b2 & 0x0f;

sb.append(Integer.toHexString(b11));
sb.append(Integer.toHexString(b12));
sb.append(Integer.toHexString(b21));
sb.append(Integer.toHexString(b22));
sb.append(' ');
}
return sb.toString();
private static String escape(String s) {
return s.chars().mapToObj(c -> String.format("\\u%04x", c))
.collect(Collectors.joining());
}

}

1 comment on commit f25c920

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.