diff --git a/src/main/java/org/apache/hadoop/util/LineReader.java b/src/main/java/org/apache/hadoop/util/LineReader.java index 97410c4..d3259f3 100644 --- a/src/main/java/org/apache/hadoop/util/LineReader.java +++ b/src/main/java/org/apache/hadoop/util/LineReader.java @@ -41,6 +41,10 @@ @InterfaceAudience.LimitedPrivate({"MapReduce"}) @InterfaceStability.Unstable public class LineReader implements Closeable { + // Limitation for array size is VM specific. Current HotSpot VM limitation + // for array size is Integer.MAX_VALUE - 5 (2^31 - 1 - 5). + // Integer.MAX_VALUE - 8 should be safe enough. + private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 9; private static final int DEFAULT_BUFFER_SIZE = 64 * 1024; private int bufferSize = DEFAULT_BUFFER_SIZE; private InputStream in; @@ -168,6 +172,8 @@ public void close() throws IOException { */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { + maxLineLength = Math.min(maxLineLength, MAX_ARRAY_SIZE); + maxBytesToConsume = Math.min(maxBytesToConsume, MAX_ARRAY_SIZE); if (this.recordDelimiterBytes != null) { return readCustomLine(str, maxLineLength, maxBytesToConsume); } else { @@ -240,12 +246,19 @@ private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) appendLength = maxLineLength - txtLength; } if (appendLength > 0) { + int newTxtLength = txtLength + appendLength; + if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) { + // If str need to be resized but the target capacity is over VM limit, it will trigger OOM. + // In such case we will throw an IOException so the caller can deal with it. + throw new IOException("Too many bytes before newline: " + newTxtLength); + } str.append(buffer, startPosn, appendLength); - txtLength += appendLength; + txtLength = newTxtLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); if (bytesConsumed > Integer.MAX_VALUE) { + throw new IOException("Too many bytes before newline: " + bytesConsumed); } return (int)bytesConsumed; @@ -342,8 +355,14 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) unsetNeedAdditionalRecordAfterSplit(); } if (appendLength > 0) { + int newTxtLength = txtLength + appendLength; + if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) { + // If str need to be resized but the target capacity is over VM limit, it will trigger OOM. + // In such case we will throw an IOException so the caller can deal with it. + throw new IOException("Too many bytes before newline: " + newTxtLength); + } str.append(buffer, startPosn, appendLength); - txtLength += appendLength; + txtLength = newTxtLength; } if (bufferPosn >= bufferLength) { if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {