From c1f5ca115d514327f4c3681e61663e22b686f8c7 Mon Sep 17 00:00:00 2001 From: "Archie L. Cobbs" Date: Tue, 28 Mar 2023 16:14:37 +0000 Subject: [PATCH] 8303623: Compiler should disallow non-standard UTF-8 string encodings Reviewed-by: vromero --- .../com/sun/tools/javac/code/Source.java | 1 + .../sun/tools/javac/comp/LambdaToMethod.java | 7 +- .../com/sun/tools/javac/comp/Lower.java | 2 +- .../com/sun/tools/javac/jvm/ClassFile.java | 21 +- .../com/sun/tools/javac/jvm/ClassReader.java | 90 +++++-- .../sun/tools/javac/jvm/ModuleNameReader.java | 10 +- .../com/sun/tools/javac/jvm/PoolReader.java | 67 +++-- .../com/sun/tools/javac/jvm/PoolWriter.java | 11 +- .../tools/javac/resources/compiler.properties | 9 + .../com/sun/tools/javac/util/ByteBuffer.java | 5 +- .../com/sun/tools/javac/util/Convert.java | 191 +++++++++---- .../tools/javac/util/InvalidUtfException.java | 54 ++++ .../com/sun/tools/javac/util/Name.java | 34 ++- .../com/sun/tools/javac/util/Names.java | 14 +- .../sun/tools/javac/util/SharedNameTable.java | 4 +- .../tools/javac/util/UnsharedNameTable.java | 10 +- .../classreader/InvalidModifiedUtf8Test.java | 252 ++++++++++++++++++ .../tools/javac/diags/examples.not-yet.txt | 2 + 18 files changed, 663 insertions(+), 121 deletions(-) create mode 100644 src/jdk.compiler/share/classes/com/sun/tools/javac/util/InvalidUtfException.java create mode 100644 test/langtools/tools/javac/classreader/InvalidModifiedUtf8Test.java diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java index d5a452b661259..a8d99141ac608 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/Source.java @@ -236,6 +236,7 @@ public enum Feature { REDUNDANT_STRICTFP(JDK17), UNCONDITIONAL_PATTERN_IN_INSTANCEOF(JDK19, Fragments.FeatureUnconditionalPatternsInInstanceof, DiagKind.PLURAL), RECORD_PATTERNS(JDK19, Fragments.FeatureDeconstructionPatterns, DiagKind.PLURAL), + WARN_ON_ILLEGAL_UTF8(MIN, JDK21), ; enum DiagKind { diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/LambdaToMethod.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/LambdaToMethod.java index 90ec561474920..cf8579539128b 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/LambdaToMethod.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/LambdaToMethod.java @@ -2453,7 +2453,12 @@ protected void append(char ch) { @Override protected void append(byte[] ba) { - Name name = names.fromUtf(ba); + Name name; + try { + name = names.fromUtf(ba); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } sb.append(name.toString()); } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/Lower.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/Lower.java index 81f4f8d9890ed..de826507f7fc1 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/Lower.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/comp/Lower.java @@ -551,7 +551,7 @@ class RuntimeEnumMapping implements EnumMapping { .fromString(target.syntheticNameChar() + "SwitchMap" + target.syntheticNameChar() + - names.fromUtf(ClassWriter.externalize(forEnum.type.tsym.flatName())).toString() + ClassWriter.externalize(forEnum.type.tsym.flatName().toString()) .replace('/', '.') .replace('.', target.syntheticNameChar())); ClassSymbol outerCacheClass = outerCacheClass(); diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java index 9805e5649764d..f740e528b274f 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassFile.java @@ -106,6 +106,7 @@ public class ClassFile { public enum Version { V45_3(45, 3), // base level for all attributes + V48(48, 0), // JDK 1.4 V49(49, 0), // JDK 1.5: enum, generics, annotations V50(50, 0), // JDK 1.6: stackmaps V51(51, 0), // JDK 1.7 @@ -165,24 +166,18 @@ public static byte[] internalize(byte[] buf, int offset, int len) { * Note: the naming is the inverse of that used by JVMS 4.2 The Internal Form Of Names, * which defines "internal name" to be the form using "/" instead of "." */ - public static byte[] internalize(Name name) { - return internalize(name.getByteArray(), name.getByteOffset(), name.getByteLength()); + public static Name internalize(Name name) { + return name.table.names.fromString(name.toString().replace('/', '.')); } /** - * Return external representation of buf[offset..offset+len-1], converting '.' to '/'. + * Return external representation of given name, converting '/' to '.'. * * Note: the naming is the inverse of that used by JVMS 4.2 The Internal Form Of Names, * which defines "internal name" to be the form using "/" instead of "." */ - public static byte[] externalize(byte[] buf, int offset, int len) { - byte[] translated = new byte[len]; - for (int j = 0; j < len; j++) { - byte b = buf[offset + j]; - if (b == '.') translated[j] = (byte) '/'; - else translated[j] = b; - } - return translated; + public static Name externalize(Name name) { + return name.table.names.fromString(externalize(name.toString())); } /** @@ -191,7 +186,7 @@ public static byte[] externalize(byte[] buf, int offset, int len) { * Note: the naming is the inverse of that used by JVMS 4.2 The Internal Form Of Names, * which defines "internal name" to be the form using "/" instead of "." */ - public static byte[] externalize(Name name) { - return externalize(name.getByteArray(), name.getByteOffset(), name.getByteLength()); + public static String externalize(String name) { + return name.replace('.', '/'); } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java index 8a5458f74d2f6..ace1434921416 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java @@ -43,6 +43,7 @@ import javax.tools.JavaFileManager; import javax.tools.JavaFileObject; +import com.sun.tools.javac.code.Source; import com.sun.tools.javac.code.Source.Feature; import com.sun.tools.javac.comp.Annotate; import com.sun.tools.javac.comp.Annotate.AnnotationTypeCompleter; @@ -65,6 +66,7 @@ import com.sun.tools.javac.util.ByteBuffer.UnderflowException; import com.sun.tools.javac.util.DefinedBy.Api; import com.sun.tools.javac.util.JCDiagnostic.DiagnosticPosition; +import com.sun.tools.javac.util.JCDiagnostic.Fragment; import static com.sun.tools.javac.code.Flags.*; import static com.sun.tools.javac.code.Kinds.Kind.*; @@ -118,6 +120,10 @@ public class ClassReader { */ boolean lintClassfile; + /** Switch: warn (instead of error) on illegal UTF-8 + */ + boolean warnOnIllegalUtf8; + /** Switch: preserve parameter names from the variable table. */ public boolean saveParameterNames; @@ -190,6 +196,9 @@ public class ClassReader { /** The minor version number of the class file being read. */ int minorVersion; + /** UTF-8 validation level */ + Convert.Validation utf8validation; + /** A table to hold the constant pool indices for method parameter * names, as given in LocalVariableTable attributes. */ @@ -281,6 +290,7 @@ protected ClassReader(Context context) { allowModules = Feature.MODULES.allowedInSource(source); allowRecords = Feature.RECORDS.allowedInSource(source); allowSealedTypes = Feature.SEALED_CLASSES.allowedInSource(source); + warnOnIllegalUtf8 = Feature.WARN_ON_ILLEGAL_UTF8.allowedInSource(source); saveParameterNames = options.isSet(PARAMETERS); @@ -307,10 +317,18 @@ private void enterMember(ClassSymbol c, Symbol sym) { ***********************************************************************/ public ClassFinder.BadClassFile badClassFile(String key, Object... args) { + return badClassFile(diagFactory.fragment(key, args)); + } + + public ClassFinder.BadClassFile badClassFile(Fragment fragment) { + return badClassFile(diagFactory.fragment(fragment)); + } + + public ClassFinder.BadClassFile badClassFile(JCDiagnostic diagnostic) { return new ClassFinder.BadClassFile ( currentOwner.enclClass(), currentClassFile, - diagFactory.fragment(key, args), + diagnostic, diagFactory, dcfh); } @@ -335,7 +353,7 @@ char nextChar() { try { res = buf.getChar(bp); } catch (UnderflowException e) { - throw badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } bp += 2; return res; @@ -347,7 +365,7 @@ int nextByte() { try { return buf.getByte(bp++) & 0xFF; } catch (UnderflowException e) { - throw badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } } @@ -358,7 +376,7 @@ int nextInt() { try { res = buf.getInt(bp); } catch (UnderflowException e) { - throw badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } bp += 4; return res; @@ -455,7 +473,7 @@ Type sigToType() { sigp++; return sigEnterPhase ? Type.noType - : findTypeVar(names.fromUtf(signature, start, sigp - 1 - start)); + : findTypeVar(readName(signature, start, sigp - 1 - start)); case '+': { sigp++; Type t = sigToType(); @@ -539,8 +557,7 @@ Type sigToType() { typevars = typevars.leave(); return poly; default: - throw badClassFile("bad.signature", - Convert.utf2string(signature, sigp, 10)); + throw badClassFile("bad.signature", quoteBadSignature()); } } @@ -550,8 +567,7 @@ Type sigToType() { */ Type classSigToType() { if (signature[sigp] != 'L') - throw badClassFile("bad.class.signature", - Convert.utf2string(signature, sigp, 10)); + throw badClassFile("bad.class.signature", quoteBadSignature()); sigp++; Type outer = Type.noType; int startSbp = sbp; @@ -561,7 +577,7 @@ Type classSigToType() { switch (c) { case ';': { // end - ClassSymbol t = enterClass(names.fromUtf(signatureBuffer, + ClassSymbol t = enterClass(readName(signatureBuffer, startSbp, sbp - startSbp)); @@ -575,7 +591,7 @@ Type classSigToType() { } case '<': // generic arguments - ClassSymbol t = enterClass(names.fromUtf(signatureBuffer, + ClassSymbol t = enterClass(readName(signatureBuffer, startSbp, sbp - startSbp)); outer = new ClassType(outer, sigToTypes('>'), t) { @@ -638,7 +654,7 @@ public void setEnclosingType(Type outer) { case '.': //we have seen an enclosing non-generic class if (outer != Type.noType) { - t = enterClass(names.fromUtf(signatureBuffer, + t = enterClass(readName(signatureBuffer, startSbp, sbp - startSbp)); outer = new ClassType(outer, List.nil(), t); @@ -655,6 +671,20 @@ public void setEnclosingType(Type outer) { } } + /** Quote a bogus signature for display inside an error message. + */ + String quoteBadSignature() { + String sigString; + try { + sigString = Convert.utf2string(signature, sigp, siglimit - sigp, Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } + if (sigString.length() > 32) + sigString = sigString.substring(0, 32) + "..."; + return "\"" + sigString + "\""; + } + /** Convert (implicit) signature to list of types * until `terminator' is encountered. */ @@ -701,7 +731,7 @@ List sigToTypeParams() { Type sigToTypeParam() { int start = sigp; while (signature[sigp] != ':') sigp++; - Name name = names.fromUtf(signature, start, sigp - start); + Name name = readName(signature, start, sigp - start); TypeVar tvar; if (sigEnterPhase) { tvar = new TypeVar(name, currentOwner, syms.botType); @@ -752,6 +782,19 @@ Type findTypeVar(Name name) { } } + private Name readName(byte[] buf, int off, int len) { + try { + return names.fromUtf(buf, off, len, utf8validation); + } catch (InvalidUtfException e) { + if (warnOnIllegalUtf8) { + log.warning(Warnings.InvalidUtf8InClassfile(currentClassFile, + Fragments.BadUtf8ByteSequenceAt(sigp))); + return names.fromUtfLax(buf, off, len); + } + throw badClassFile(Fragments.BadUtf8ByteSequenceAt(sigp)); + } + } + /************************************************************************ * Reading Attributes ***********************************************************************/ @@ -1106,7 +1149,7 @@ protected void read(Symbol sym, int attrLen) { ModuleSymbol msym = (ModuleSymbol) sym.owner; ListBuffer directives = new ListBuffer<>(); - Name moduleName = poolReader.peekModuleName(nextChar(), names::fromUtf); + Name moduleName = poolReader.peekModuleName(nextChar(), ClassReader.this::readName); if (currentModule.name != moduleName) { throw badClassFile("module.name.mismatch", moduleName, currentModule.name); } @@ -1201,8 +1244,18 @@ protected void read(Symbol sym, int attrLen) { } } - private Name classNameMapper(byte[] arr, int offset, int length) { - return names.fromUtf(ClassFile.internalize(arr, offset, length)); + private Name classNameMapper(byte[] arr, int offset, int length) throws InvalidUtfException { + byte[] buf = ClassFile.internalize(arr, offset, length); + try { + return names.fromUtf(buf, 0, buf.length, utf8validation); + } catch (InvalidUtfException e) { + if (warnOnIllegalUtf8) { + log.warning(Warnings.InvalidUtf8InClassfile(currentClassFile, + Fragments.BadUtf8ByteSequenceAt(e.getOffset()))); + return names.fromUtfLax(buf, 0, buf.length); + } + throw e; + } } }, @@ -1502,7 +1555,7 @@ void readParameterAnnotations(Symbol meth) { try { numParameters = buf.getByte(bp++) & 0xFF; } catch (UnderflowException e) { - throw badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } if (parameterAnnotations == null) { parameterAnnotations = new ParameterAnnotations[numParameters]; @@ -1796,7 +1849,7 @@ Attribute readAttributeValue() { try { c = (char)buf.getByte(bp++); } catch (UnderflowException e) { - throw badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } switch (c) { case 'B': @@ -2659,6 +2712,7 @@ private void readClassBuffer(ClassSymbol c) throws IOException { Integer.toString(maxMajor), Integer.toString(maxMinor)); } + utf8validation = majorVersion < V48.major ? Convert.Validation.PREJDK14 : Convert.Validation.STRICT; if (previewClassFile) { if (!preview.isEnabled()) { diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ModuleNameReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ModuleNameReader.java index a34246cabdb64..b4c84293a2074 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ModuleNameReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ModuleNameReader.java @@ -157,11 +157,13 @@ int nextInt() throws BadClassFile { return res; } - NameMapper utf8Mapper(boolean internalize) { + PoolReader.Utf8Mapper utf8Mapper(boolean internalize) { return internalize ? - (buf, offset, len) -> - Convert.utf2string(ClassFile.internalize(buf, offset, len)) : - Convert::utf2string; + (buf, offset, len) -> { + buf = ClassFile.internalize(buf, offset, len); + return Convert.utf2string(buf, 0, buf.length, Convert.Validation.STRICT); + } : + (buf, offset, len) -> Convert.utf2string(buf, offset, len, Convert.Validation.STRICT); } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolReader.java index 5607105bba3ab..c4ae9a20e0b13 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolReader.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolReader.java @@ -25,16 +25,20 @@ package com.sun.tools.javac.jvm; +import com.sun.tools.javac.code.Source; import com.sun.tools.javac.code.Symbol.ClassSymbol; import com.sun.tools.javac.code.Symbol.ModuleSymbol; import com.sun.tools.javac.code.Symbol.PackageSymbol; import com.sun.tools.javac.code.Symtab; import com.sun.tools.javac.code.Type; import com.sun.tools.javac.jvm.PoolConstant.NameAndType; +import com.sun.tools.javac.resources.CompilerProperties.Fragments; +import com.sun.tools.javac.resources.CompilerProperties.Warnings; import com.sun.tools.javac.util.ByteBuffer; import com.sun.tools.javac.util.ByteBuffer.UnderflowException; +import com.sun.tools.javac.util.Convert; +import com.sun.tools.javac.util.InvalidUtfException; import com.sun.tools.javac.util.Name; -import com.sun.tools.javac.util.Name.NameMapper; import com.sun.tools.javac.util.Names; import java.util.Arrays; @@ -74,6 +78,7 @@ public class PoolReader { private final ByteBuffer buf; private final Names names; private final Symtab syms; + private final Convert.Validation utf8validation; private ImmutablePoolHelper pool; @@ -90,6 +95,7 @@ public class PoolReader { this.buf = buf; this.names = names; this.syms = syms; + this.utf8validation = reader != null ? reader.utf8validation : Convert.Validation.NONE; } private static final BitSet classCP = new BitSet(); @@ -118,29 +124,29 @@ ClassSymbol getClass(int index) { /** * Get class name without resolving */ - Z peekClassName(int index, NameMapper mapper) { + Z peekClassName(int index, Utf8Mapper mapper) { return peekItemName(index, mapper); } /** * Get package name without resolving */ - Z peekPackageName(int index, NameMapper mapper) { + Z peekPackageName(int index, Utf8Mapper mapper) { return peekItemName(index, mapper); } /** * Get module name without resolving */ - Z peekModuleName(int index, NameMapper mapper) { + Z peekModuleName(int index, Utf8Mapper mapper) { return peekItemName(index, mapper); } - private Z peekItemName(int index, NameMapper mapper) { + private Z peekItemName(int index, Utf8Mapper mapper) { try { index = buf.getChar(pool.offset(index)); } catch (UnderflowException e) { - throw reader.badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw reader.badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } return peekName(index, mapper); } @@ -162,11 +168,13 @@ PackageSymbol getPackage(int index) { /** * Peek a name from the pool at given index without resolving. */ - Z peekName(int index, Name.NameMapper mapper) { + Z peekName(int index, Utf8Mapper mapper) { try { return getUtf8(index, mapper); + } catch (InvalidUtfException e) { + throw reader.badClassFile(Fragments.BadUtf8ByteSequenceAt(e.getOffset())); } catch (UnderflowException e) { - throw reader.badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw reader.badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } } @@ -202,12 +210,14 @@ boolean hasTag(int index, int tag) { return pool.tag(index) == tag; } - private Z getUtf8(int index, NameMapper mapper) throws UnderflowException { + private Z getUtf8(int index, Utf8Mapper mapper) throws InvalidUtfException, UnderflowException { int tag = pool.tag(index); int offset = pool.offset(index); if (tag == CONSTANT_Utf8) { - int len = pool.poolbuf.getChar(offset); - return mapper.map(pool.poolbuf.elems, offset + 2, len); + int utf8len = pool.poolbuf.getChar(offset); + int utf8off = offset + 2; + pool.poolbuf.verifyRange(utf8off, utf8len); + return mapper.map(pool.poolbuf.elems, utf8off, utf8len); } else { throw reader.badClassFile("unexpected.const.pool.tag.at", Integer.toString(tag), @@ -215,15 +225,26 @@ private Z getUtf8(int index, NameMapper mapper) throws UnderflowException } } - private Object resolve(ByteBuffer poolbuf, int tag, int offset) throws UnderflowException { + private Object resolve(ByteBuffer poolbuf, int tag, int offset) throws InvalidUtfException, UnderflowException { switch (tag) { case CONSTANT_Utf8: { int len = poolbuf.getChar(offset); - return names.fromUtf(poolbuf.elems, offset + 2, len); + try { + return names.fromUtf(poolbuf.elems, offset + 2, len, utf8validation); + } catch (InvalidUtfException e) { + if (reader == null || reader.warnOnIllegalUtf8) { + if (reader != null) { + reader.log.warning(Warnings.InvalidUtf8InClassfile( + reader.currentClassFile, Fragments.BadUtf8ByteSequenceAt(e.getOffset()))); + } + return names.fromUtfLax(poolbuf.elems, offset + 2, len); + } + throw e; + } } case CONSTANT_Class: { int index = poolbuf.getChar(offset); - Name name = names.fromUtf(getName(index).map(ClassFile::internalize)); + Name name = internalize(getName(index)); return syms.enterClass(reader.currentModule, name); } case CONSTANT_NameandType: { @@ -243,7 +264,7 @@ private Object resolve(ByteBuffer poolbuf, int tag, int offset) throws Underflow return getName(poolbuf.getChar(offset)).toString(); case CONSTANT_Package: { Name name = getName(poolbuf.getChar(offset)); - return syms.enterPackage(reader.currentModule, names.fromUtf(internalize(name))); + return syms.enterPackage(reader.currentModule, internalize(name)); } case CONSTANT_Module: { Name name = getName(poolbuf.getChar(offset)); @@ -261,13 +282,13 @@ private Object resolve(ByteBuffer poolbuf, int tag, int offset) throws Underflow * reasons, it would be unwise to eagerly turn all pool entries into corresponding javac * entities. First, not all entries are actually going to be read/used by javac; secondly, * there are cases where creating a symbol too early might result in issues (hence methods like - * {@link PoolReader#peekClassName(int, NameMapper)}. + * {@link PoolReader#peekClassName(int, Utf8Mapper)}. */ int readPool(ByteBuffer poolbuf, int offset) { try { return readPoolInternal(poolbuf, offset); } catch (UnderflowException e) { - throw reader.badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw reader.badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } } @@ -367,9 +388,11 @@

P readIfNeeded(int index, BitSet expectedTags) { } P p; try { - p = (P)resolve(poolbuf, tag(index), offset(index)); + p = (P)resolve(poolbuf, currentTag, offset(index)); + } catch (InvalidUtfException e) { + throw reader.badClassFile(Fragments.BadUtf8ByteSequenceAt(e.getOffset())); } catch (UnderflowException e) { - throw reader.badClassFile("bad.class.truncated.at.offset", Integer.toString(e.getLength())); + throw reader.badClassFile(Fragments.BadClassTruncatedAtOffset(e.getLength())); } values[index] = p; return p; @@ -380,4 +403,10 @@ int tag(int index) { return poolbuf.elems[offset(index) - 1]; } } + +// Utf8Mapper + + public interface Utf8Mapper { + X map(byte[] bytes, int offset, int len) throws InvalidUtfException; + } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolWriter.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolWriter.java index 9d510e8fcd0f5..af6f4b67faeb5 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolWriter.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/PoolWriter.java @@ -42,6 +42,7 @@ import com.sun.tools.javac.jvm.PoolConstant.Dynamic.BsmKey; import com.sun.tools.javac.jvm.PoolConstant.NameAndType; import com.sun.tools.javac.util.ByteBuffer; +import com.sun.tools.javac.util.InvalidUtfException; import com.sun.tools.javac.util.List; import com.sun.tools.javac.util.Name; import com.sun.tools.javac.util.Names; @@ -324,7 +325,11 @@ protected void reset() { } protected Name toName() { - return sigbuf.toName(names); + try { + return sigbuf.toName(names); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } } @@ -365,7 +370,7 @@ void writeConstant(PoolConstant c) { Type ct = (Type)c; Name name = ct.hasTag(ARRAY) ? typeSig(ct) : - names.fromUtf(externalize(ct.tsym.flatName())); + externalize(ct.tsym.flatName()); poolbuf.appendByte(tag); poolbuf.appendChar(putName(name)); if (ct.hasTag(CLASS)) { @@ -396,7 +401,7 @@ void writeConstant(PoolConstant c) { } case ClassFile.CONSTANT_Package: { PackageSymbol pkg = (PackageSymbol)c; - Name pkgName = names.fromUtf(externalize(pkg.flatName())); + Name pkgName = externalize(pkg.flatName()); poolbuf.appendByte(tag); poolbuf.appendChar(putName(pkgName)); break; diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties b/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties index 3edf0f8aea750..c53c241334a43 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties @@ -2045,6 +2045,10 @@ compiler.warn.big.major.version=\ {0}: major version {1} is newer than {2}, the highest major version supported by this compiler.\n\ It is recommended that the compiler be upgraded. +# 0: file name, 1: fragment +compiler.warn.invalid.utf8.in.classfile=\ + {0}: classfile contains invalid UTF-8: {1} + # 0: kind name, 1: symbol compiler.warn.static.not.qualified.by.type=\ static {0} should be qualified by type name, {1}, instead of by an expression @@ -2455,9 +2459,14 @@ compiler.misc.bad.const.pool.tag=\ compiler.misc.bad.const.pool.tag.at=\ bad constant pool tag: {0} at {1} +# 0: number +compiler.misc.bad.utf8.byte.sequence.at=\ + bad UTF-8 byte sequence at {0} + compiler.misc.unexpected.const.pool.tag.at=\ unexpected constant pool tag: {0} at {1} +# 0: number compiler.misc.bad.class.truncated.at.offset=\ class file truncated at offset {0} diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/ByteBuffer.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/ByteBuffer.java index 597b4f0bdbf09..0f87667a8f07a 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/ByteBuffer.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/ByteBuffer.java @@ -272,9 +272,10 @@ public void reset() { } /** Convert contents to name. + * @throws InvalidUtfException if invalid Modified UTF-8 is encountered */ - public Name toName(Names names) { - return names.fromUtf(elems, 0, length); + public Name toName(Names names) throws InvalidUtfException { + return names.fromUtf(elems, 0, length, Convert.Validation.STRICT); } /** Verify there are at least the specified number of bytes in this buffer at the specified offset. diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java index 57c3fe9b2344b..6e0fd5f76e4d3 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java @@ -100,77 +100,120 @@ public static long string2long(String s, int radix) /* Conversion routines between names, strings, and byte arrays in Utf8 format */ - /** Convert `len' bytes from utf8 to characters. - * Parameters are as in System.arraycopy - * Return first index in `dst' past the last copied char. + /** Validate the given Modified UTF-8 encoding using the given validation level. + * Reject invalid data by throwing an {@link InvalidUtfException}. + * Note: there is no point in calling this method with {@link Validation#NONE}. + * @param buf Buffer containing data + * @param off Data starting offset + * @param len Data length + * @param validation Level of validation + * @throws InvalidUtfException if {@code validation} is not {@link Validation#NONE} + * and invalid Modified UTF-8 is encountered + */ + public static void utfValidate(byte[] buf, int off, int len, Validation validation) throws InvalidUtfException { + utf2chars(buf, off, null, 0, len, validation); + } + + /** Decode characters encoded in Modified UTF-8 encoding using the given validation level. + * Reject any invalid data by throwing an {@link InvalidUtfException}. + * Parameters are as in System.arraycopy(): * @param src The array holding the bytes to convert. - * @param sindex The start index from which bytes are converted. - * @param dst The array holding the converted characters.. - * @param dindex The start index from which converted characters + * @param soff The start index from which bytes are converted. + * @param dst The array holding the converted characters, + * or null to just validate + * @param doff The start index from which converted characters * are written. * @param len The maximum number of bytes to convert. + * @param validation Level of validation + * @throws InvalidUtfException if invalid Modified UTF-8 is encountered + * @return the index in {@code dst} just after the last copied char + * @throws InvalidUtfException if {@code validation} is not {@link Validation#NONE} + * and invalid Modified UTF-8 is encountered */ - public static int utf2chars(byte[] src, int sindex, - char[] dst, int dindex, - int len) { - int i = sindex; - int j = dindex; - int limit = sindex + len; - while (i < limit) { - int b = src[i++] & 0xFF; - if (b >= 0xE0) { - b = (b & 0x0F) << 12; - b = b | (src[i++] & 0x3F) << 6; - b = b | (src[i++] & 0x3F); - } else if (b >= 0xC0) { - b = (b & 0x1F) << 6; - b = b | (src[i++] & 0x3F); - } - dst[j++] = (char)b; + public static int utf2chars(byte[] src, int soff, char[] dst, int doff, int len, Validation validation) + throws InvalidUtfException { + final int doff0 = doff; + while (len-- > 0) { + final int soff0 = soff; + int value = src[soff++]; + if (value < 0) { + if ((value & 0xe0) == 0xc0) { + int value2; + if (len-- > 0) + value2 = src[soff++]; + else if (validation.allowAnything()) + value2 = 0; + else + throw new InvalidUtfException(soff0); + if (!validation.allowAnything() && (value2 & 0xc0) != 0x80) + throw new InvalidUtfException(soff0); + value = ((value & 0x1f) << 6) | (value2 & 0x3f); + if (!validation.allowLongEncoding() && (value & ~0x7f) == 0 && value != 0) + throw new InvalidUtfException(soff0); // could have been one byte + } else if ((value & 0xf0) == 0xe0) { + int value2; + int value3; + if ((len -= 2) >= 0) { + value2 = src[soff++]; + value3 = src[soff++]; + } else if (validation.allowAnything()) { + value2 = 0; + value3 = 0; + } else + throw new InvalidUtfException(soff0); + if (!validation.allowAnything() && ((value2 & 0xc0) != 0x80 || (value3 & 0xc0) != 0x80)) + throw new InvalidUtfException(soff0); + value = ((value & 0x0f) << 12) | ((value2 & 0x3f) << 6) | (value3 & 0x3f); + if (!validation.allowLongEncoding() && (value & ~0x7ff) == 0) + throw new InvalidUtfException(soff0); // could have been two bytes + } else if (validation.allowAnything()) + value &= 0xff; + else + throw new InvalidUtfException(soff0); + } else if (!validation.allowSingleByteNul() && value == 0) + throw new InvalidUtfException(soff0); // 0x0000 must be encoded as two bytes + if (dst != null) + dst[doff] = (char)value; + doff++; } - return j; + return doff - doff0; } - /** Return bytes in Utf8 representation as an array of characters. + /** Decode characters encoded in Modified UTF-8 encoding. * @param src The array holding the bytes. * @param sindex The start index from which bytes are converted. * @param len The maximum number of bytes to convert. + * @param validation Level of validation + * @return The decoded characters in an array. + * @throws InvalidUtfException if {@code validation} is not {@link Validation#NONE} + * and invalid Modified UTF-8 is encountered */ - public static char[] utf2chars(byte[] src, int sindex, int len) { + public static char[] utf2chars(byte[] src, int sindex, int len, Validation validation) + throws InvalidUtfException { char[] dst = new char[len]; - int len1 = utf2chars(src, sindex, dst, 0, len); + int len1 = utf2chars(src, sindex, dst, 0, len, validation); + if (len1 == len) + return dst; char[] result = new char[len1]; System.arraycopy(dst, 0, result, 0, len1); return result; } - /** Return all bytes of a given array in Utf8 representation - * as an array of characters. - * @param src The array holding the bytes. - */ - public static char[] utf2chars(byte[] src) { - return utf2chars(src, 0, src.length); - } - - /** Return bytes in Utf8 representation as a string. + /** Decode a {@link String} encoded in Modified UTF-8 encoding. * @param src The array holding the bytes. * @param sindex The start index from which bytes are converted. * @param len The maximum number of bytes to convert. + * @param validation Level of validation + * @throws InvalidUtfException if {@code validation} is not {@link Validation#NONE} + * and invalid Modified UTF-8 is encountered */ - public static String utf2string(byte[] src, int sindex, int len) { + public static String utf2string(byte[] src, int sindex, int len, Validation validation) + throws InvalidUtfException { char dst[] = new char[len]; - int len1 = utf2chars(src, sindex, dst, 0, len); + int len1 = utf2chars(src, sindex, dst, 0, len, validation); return new String(dst, 0, len1); } - /** Return all bytes of a given array in Utf8 representation - * as a string. - * @param src The array holding the bytes. - */ - public static String utf2string(byte[] src) { - return utf2string(src, 0, src.length); - } - /** Copy characters in source array to bytes in target array, * converting them to Utf8 representation. * The target array must be large enough to hold the result. @@ -358,4 +401,60 @@ public static List classCandidates(Name name) { } return names.reverse(); } + + /** + * Modified UTF-8 decoding validation levels. + */ + public enum Validation { + + /** + * Do zero validation of UTF-8, i.e., always decode something without error. + * When this is used, {@link InvalidUtfException} is never thrown. + */ + NONE(true, true, true), + + /** + * Do validation in accordance with the pre-JDK 1.4 Java class file format, + * which allows (a) the NUL character {@code \u0000} to be encoded as a single byte + * and (b) longer-than-necessary encodings (e.g., three bytes instead of two). + */ + PREJDK14(true, true, false), + + /** + * Do strict validation. At this level, each character has only one valid encoding. + */ + STRICT(false, false, false); + + private final boolean allowSingleByteNul; + private final boolean allowLongEncoding; + private final boolean allowAnything; + + private Validation(boolean allowSingleByteNul, boolean allowLongEncoding, boolean allowAnything) { + this.allowSingleByteNul = allowSingleByteNul; + this.allowLongEncoding = allowLongEncoding; + this.allowAnything = allowAnything; + } + + /** + * Whether to allow the NUL character {@code \u0000} to be encoded as a single byte. + * Modified UTF-8 specifies that it be encoded in two bytes. + */ + public boolean allowSingleByteNul() { + return allowSingleByteNul; + } + + /** + * Whether to allow characters to be encoded using more bytes than required. + */ + public boolean allowLongEncoding() { + return allowLongEncoding; + } + + /** + * Whether to allow anything, including truncated characters and bogus flag bits. + */ + public boolean allowAnything() { + return allowAnything; + } + } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/InvalidUtfException.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/InvalidUtfException.java new file mode 100644 index 0000000000000..98ebb19b63d88 --- /dev/null +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/InvalidUtfException.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package com.sun.tools.javac.util; + +/** + * Exception thrown when invalid Modified UTF-8 is encountered. + * + *

This is NOT part of any supported API. + * If you write code that depends on this, you do so at your own risk. + * This code and its internal interfaces are subject to change or + * deletion without notice. + * + * @see Convert#utf2chars + * @see Convert#utfValidate + */ +public class InvalidUtfException extends Exception { + + private static final long serialVersionUID = 0; + + private final int offset; + + public InvalidUtfException(int offset) { + this.offset = offset; + } + + /** Get the {@code byte[]} array offset at which the invalid data was found. + */ + public int getOffset() { + return offset; + } +} diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Name.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Name.java index c67109ce6e497..26c6a4286cd38 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Name.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Name.java @@ -87,7 +87,11 @@ public Name append(Name n) { byte[] bs = new byte[len + n.getByteLength()]; getBytes(bs, 0); n.getBytes(bs, len); - return table.fromUtf(bs, 0, bs.length); + try { + return table.fromUtf(bs, 0, bs.length, Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } /** Return the concatenation of this name, the given ASCII @@ -99,7 +103,11 @@ public Name append(char c, Name n) { getBytes(bs, 0); bs[len] = (byte) c; n.getBytes(bs, len+1); - return table.fromUtf(bs, 0, bs.length); + try { + return table.fromUtf(bs, 0, bs.length, Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } /** Order names lexicographically. @@ -180,14 +188,22 @@ public boolean startsWith(Name prefix) { */ public Name subName(int start, int end) { if (end < start) end = start; - return table.fromUtf(getByteArray(), getByteOffset() + start, end - start); + try { + return table.fromUtf(getByteArray(), getByteOffset() + start, end - start, Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } /** Return the string representation of this name. */ @Override public String toString() { - return Convert.utf2string(getByteArray(), getByteOffset(), getByteLength()); + try { + return Convert.utf2string(getByteArray(), getByteOffset(), getByteLength(), Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } /** Return the Utf8 representation of this name. @@ -257,16 +273,18 @@ public Name fromString(String s) { } /** Get the name for the bytes in array cs. - * Assume that bytes are in utf8 format. + * Assume that bytes are in strictly valid "Modified UTF-8" format. */ - public Name fromUtf(byte[] cs) { - return fromUtf(cs, 0, cs.length); + public Name fromUtf(byte[] cs) throws InvalidUtfException { + return fromUtf(cs, 0, cs.length, Convert.Validation.STRICT); } /** get the name for the bytes in cs[start..start+len-1]. * Assume that bytes are in utf8 format. + * @throws InvalidUtfException if invalid Modified UTF-8 is encountered */ - public abstract Name fromUtf(byte[] cs, int start, int len); + public abstract Name fromUtf(byte[] cs, int start, int len, Convert.Validation validation) + throws InvalidUtfException; /** Release any resources used by this table. */ diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Names.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Names.java index e4efe500e7163..303cd3c2eb914 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Names.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Names.java @@ -421,11 +421,19 @@ public Name fromString(String s) { return table.fromString(s); } - public Name fromUtf(byte[] cs) { + public Name fromUtf(byte[] cs) throws InvalidUtfException { return table.fromUtf(cs); } - public Name fromUtf(byte[] cs, int start, int len) { - return table.fromUtf(cs, start, len); + public Name fromUtf(byte[] cs, int start, int len, Convert.Validation validation) throws InvalidUtfException { + return table.fromUtf(cs, start, len, validation); + } + + public Name fromUtfLax(byte[] cs, int start, int len) { + try { + return table.fromUtf(cs, start, len, Convert.Validation.NONE); + } catch (InvalidUtfException e) { + throw new AssertionError(e); + } } } diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/SharedNameTable.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/SharedNameTable.java index d092b3062ea7c..4c147be19d780 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/SharedNameTable.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/SharedNameTable.java @@ -119,7 +119,9 @@ public Name fromChars(char[] cs, int start, int len) { } @Override - public Name fromUtf(byte[] cs, int start, int len) { + public Name fromUtf(byte[] cs, int start, int len, Convert.Validation validation) throws InvalidUtfException { + if (validation != Convert.Validation.NONE) + Convert.utfValidate(cs, start, len, validation); int h = hashValue(cs, start, len) & hashMask; NameImpl n = hashes[h]; byte[] names = this.bytes; diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/UnsharedNameTable.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/UnsharedNameTable.java index ced7c6010c272..7602c071c76c1 100644 --- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/UnsharedNameTable.java +++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/UnsharedNameTable.java @@ -81,11 +81,17 @@ public UnsharedNameTable(Names names) { public Name fromChars(char[] cs, int start, int len) { byte[] name = new byte[len * 3]; int nbytes = Convert.chars2utf(cs, start, name, 0, len); - return fromUtf(name, 0, nbytes); + return fromValidUtf(name, 0, nbytes); } @Override - public Name fromUtf(byte[] cs, int start, int len) { + public Name fromUtf(byte[] cs, int start, int len, Convert.Validation validation) throws InvalidUtfException { + if (validation != Convert.Validation.NONE) + Convert.utfValidate(cs, start, len, validation); + return fromValidUtf(cs, start, len); + } + + private Name fromValidUtf(byte[] cs, int start, int len) { int h = hashValue(cs, start, len) & hashMask; HashEntry element = hashes[h]; diff --git a/test/langtools/tools/javac/classreader/InvalidModifiedUtf8Test.java b/test/langtools/tools/javac/classreader/InvalidModifiedUtf8Test.java new file mode 100644 index 0000000000000..1bd87af890acf --- /dev/null +++ b/test/langtools/tools/javac/classreader/InvalidModifiedUtf8Test.java @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8303623 + * @modules jdk.compiler/com.sun.tools.javac.code + * @summary Compiler should disallow non-standard UTF-8 string encodings + */ + +import com.sun.tools.javac.code.Source; +import com.sun.tools.javac.Main; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.nio.file.Files; +import java.util.Arrays; + +public class InvalidModifiedUtf8Test { + + // + // What this test does (repeatedly): + // 1. Compile a Java source file for ClassX normally + // 2. Modify the UTF-8 inside the ClassX classfile so that it is + // still valid structurally but uses a non-standard way of + // encoding some character (according to "Modified UTF-8"). + // 3. Compile a Java source file for RefClass that references ClassX + // 4. Verify that the compiler gives a "bad UTF-8" error + // + + // We change c3 a8 -> c3 e8 (illegal second byte not of the form 0x10xxxxxx) + private static final String SOURCE_0 = """ + interface CLASSNAME { + void ABC\u00e8(); // encodes to: 41 42 43 c3 a8 + } + """; + + // We change e1 80 80 -> e1 80 40 (illegal third byte not of the form 0x10xxxxxx) + private static final String SOURCE_1 = """ + interface CLASSNAME { + void ABC\u1000(); // encodes to: 41 42 43 e1 80 80 + } + """; + + // We change c4 80 -> c1 81 (illegal two-byte encoding for one-byte value) + private static final String SOURCE_2 = """ + interface CLASSNAME { + void ABC\u0100(); // encodes to: 41 42 43 c4 00 + } + """; + + // We change e1 80 80 -> e0 84 80 (illegal three-byte encoding for two-byte value) + private static final String SOURCE_3 = """ + interface CLASSNAME { + void ABC\u1000(); // encodes to: 41 42 43 e1 80 80 + } + """; + + // We change 44 -> 00 (illegal one-byte encoding of 0x0000) + private static final String SOURCE_4 = """ + interface CLASSNAME { + void ABCD(); // encodes to: 41 42 43 44 + } + """; + + // We change 43 44 -> e1 80 (illegal truncated three-byte encoding) + private static final String SOURCE_5 = """ + interface CLASSNAME { + void ABCD(); // encodes to: 41 42 43 44 + } + """; + + // This is the source file that references one of the above + private static final String REF_SOURCE = """ + interface RefClass extends CLASSNAME { + } + """; + + private static TestCase[] TEST_CASES = new TestCase[] { + new TestCase(0, SOURCE_0, "414243c3a8", "414243c3e8"), + new TestCase(1, SOURCE_1, "414243e18080", "414243e18040"), + new TestCase(2, SOURCE_2, "414243c480", "414243c181"), + new TestCase(3, SOURCE_3, "414243e18080", "414243e08480"), + new TestCase(4, SOURCE_4, "41424344", "41424300"), + new TestCase(5, SOURCE_5, "41424344", "4142e180"), + }; + + public static String bytes2string(byte[] array) { + char[] buf = new char[array.length * 2]; + for (int i = 0; i < array.length; i++) { + int value = array[i] & 0xff; + buf[i * 2] = Character.forDigit(value >> 4, 16); + buf[i * 2 + 1] = Character.forDigit(value & 0xf, 16); + } + return new String(buf); + } + + public static byte[] string2bytes(String string) { + byte[] buf = new byte[string.length() / 2]; + for (int i = 0; i < string.length(); i += 2) { + int value = Integer.parseInt(string.substring(i, i + 2), 16); + buf[i / 2] = (byte)value; + } + return buf; + } + + private static void createSourceFile(String content, File file) throws IOException { + System.err.println("creating: " + file); + try (PrintStream output = new PrintStream(new FileOutputStream(file))) { + output.println(content); + } + } + + private static void writeFile(File file, byte[] content) throws IOException { + System.err.println("writing: " + file); + try (FileOutputStream output = new FileOutputStream(file)) { + Files.write(file.toPath(), content); + } + } + + private static void compileRefClass(File file, boolean expectSuccess, String expectedError) { + final StringWriter diags = new StringWriter(); + final String[] params = new String[] { + "-classpath", + ".", + "-XDrawDiagnostics", + file.toString() + }; + System.err.println("compiling: " + file); + int ret = Main.compile(params, new PrintWriter(diags, true)); + System.err.println("exit value: " + ret); + String output = diags.toString().trim(); + if (!output.isEmpty()) + System.err.println("output:\n" + output); + else + System.err.println("no output"); + if (!expectSuccess && ret == 0) + throw new AssertionError("compilation succeeded, but expected failure"); + else if (expectSuccess && ret != 0) + throw new AssertionError("compilation failed, but expected success"); + if (expectedError != null && !diags.toString().contains(expectedError)) + throw new AssertionError("expected output \"" + expectedError + "\" not found"); + } + + public static void main(String... args) throws Exception { + + // Create source files + for (TestCase test : TEST_CASES) + test.createSourceFile(); + + // Compile source files + for (TestCase test : TEST_CASES) { + int ret = Main.compile(new String[] { test.sourceFile().toString() }); + if (ret != 0) + throw new AssertionError("compilation of " + test.sourceFile() + " failed"); + } + + // We should get warnings in JDK 21 and errors in any later release + final boolean expectSuccess = Source.DEFAULT.compareTo(Source.JDK21) <= 0; + + // Now compile REF_SOURCE against each classfile without and then with the modification. + // When compiling without the modification, everything should be normal. + // When compiling with the modification, an error should be generated. + for (TestCase test : TEST_CASES) { + System.err.println("==== TEST " + test.index() + " ===="); + + // Create reference source file + final File refSource = new File("RefClass.java"); + createSourceFile(REF_SOURCE.replaceAll("CLASSNAME", test.className()), refSource); + + // Do a normal compilation + compileRefClass(refSource, true, null); + + // Now corrupt the class file + System.err.println("modifying: " + test.classFile()); + final File classFile = test.classFile(); + final byte[] data1 = Files.readAllBytes(classFile.toPath()); + final byte[] data2 = test.modify(data1); + writeFile(classFile, data2); + + // Do a corrupt compilation + compileRefClass(refSource, expectSuccess, "compiler.misc.bad.utf8.byte.sequence.at"); + } + } + +// TestCase + + static class TestCase { + + final int index; + final String source; + final String match; + final String replace; + + TestCase(int index, String source, String match, String replace) { + this.index = index; + this.source = source.replaceAll("CLASSNAME", className()); + this.match = match; + this.replace = replace; + } + + byte[] modify(byte[] input) { + final byte[] output = string2bytes(bytes2string(input).replaceAll(match, replace)); + if (Arrays.equals(output, input)) + throw new AssertionError("modification of " + classFile() + " failed"); + return output; + } + + int index() { + return index; + } + + String className() { + return "Class" + index; + } + + File sourceFile() { + return new File(className() + ".java"); + } + + File classFile() { + return new File(className() + ".class"); + } + + void createSourceFile() throws IOException { + InvalidModifiedUtf8Test.createSourceFile(source, sourceFile()); + } + } +} diff --git a/test/langtools/tools/javac/diags/examples.not-yet.txt b/test/langtools/tools/javac/diags/examples.not-yet.txt index bb1ab466c55ca..6e78ca644bf0f 100644 --- a/test/langtools/tools/javac/diags/examples.not-yet.txt +++ b/test/langtools/tools/javac/diags/examples.not-yet.txt @@ -57,6 +57,7 @@ compiler.misc.bad.enclosing.method # bad class file compiler.misc.bad.runtime.invisible.param.annotations # bad class file compiler.misc.bad.signature # bad class file compiler.misc.bad.requires.flag # bad class file +compiler.misc.bad.utf8.byte.sequence.at # bad class file compiler.misc.bad.type.annotation.value compiler.misc.class.file.not.found # ClassReader compiler.misc.class.file.wrong.class @@ -116,6 +117,7 @@ compiler.warn.future.attr # ClassReader compiler.warn.illegal.char.for.encoding compiler.warn.incubating.modules # requires adjusted classfile compiler.warn.invalid.archive.file +compiler.warn.invalid.utf8.in.classfile # bad class file compiler.warn.is.preview # difficult to produce reliably despite future changes to java.base compiler.warn.is.preview.reflective # difficult to produce reliably despite future changes to java.base compiler.warn.output.file.clash # this warning is not generated on Linux