diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java index a1cf7099ada32..c516a30bf797a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java @@ -43,6 +43,10 @@ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable public class LineReader implements Closeable { + // Limitation for array size is VM specific. Current HotSpot VM limitation + // for array size is Integer.MAX_VALUE - 5 (2^31 - 1 - 5). + // Integer.MAX_VALUE - 8 should be safe enough. + private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 9; private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; private int bufferSize = DEFAULT_BUFFER_SIZE; private InputStream in; @@ -242,8 +246,14 @@ private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) appendLength = maxLineLength - txtLength; } if (appendLength > 0) { + int newTxtLength = txtLength + appendLength; + if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) { + // If str need to be resized but the target capacity is over VM limit, it will trigger OOM. + // In such case we will throw an IOException so the caller can deal with it. + throw new IOException("Too many bytes before newline: " + newTxtLength); + } str.append(buffer, startPosn, appendLength); - txtLength += appendLength; + txtLength = newTxtLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); @@ -344,8 +354,14 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) unsetNeedAdditionalRecordAfterSplit(); } if (appendLength > 0) { + int newTxtLength = txtLength + appendLength; + if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) { + // If str need to be resized but the target capacity is over VM limit, it will trigger OOM. + // In such case we will throw an IOException so the caller can deal with it. + throw new IOException("Too many bytes before newline: " + newTxtLength); + } str.append(buffer, startPosn, appendLength); - txtLength += appendLength; + txtLength = newTxtLength; } if (bufferPosn >= bufferLength) { if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {