Skip to content

Commit

Permalink
Prevent the LineReader to trigger OOM on array size
Browse files Browse the repository at this point in the history
  • Loading branch information
Ying Su committed Sep 7, 2018
1 parent 62a2ff3 commit f417683
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions src/main/java/org/apache/hadoop/util/LineReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@
@InterfaceAudience.LimitedPrivate({"MapReduce"})
@InterfaceStability.Unstable
public class LineReader implements Closeable {
// Limitation for array size is VM specific. Current HotSpot VM limitation
// for array size is Integer.MAX_VALUE - 5 (2^31 - 1 - 5).
// Integer.MAX_VALUE - 8 should be safe enough.
private static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 9;
private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
private int bufferSize = DEFAULT_BUFFER_SIZE;
private InputStream in;
Expand Down Expand Up @@ -168,6 +172,8 @@ public void close() throws IOException {
*/
public int readLine(Text str, int maxLineLength,
int maxBytesToConsume) throws IOException {
maxLineLength = Math.min(maxLineLength, MAX_ARRAY_SIZE);
maxBytesToConsume = Math.min(maxBytesToConsume, MAX_ARRAY_SIZE);
if (this.recordDelimiterBytes != null) {
return readCustomLine(str, maxLineLength, maxBytesToConsume);
} else {
Expand Down Expand Up @@ -240,12 +246,19 @@ private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume)
appendLength = maxLineLength - txtLength;
}
if (appendLength > 0) {
int newTxtLength = txtLength + appendLength;
if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) {
// If str need to be resized but the target capacity is over VM limit, it will trigger OOM.
// In such case we will throw an IOException so the caller can deal with it.
throw new IOException("Too many bytes before newline: " + newTxtLength);
}
str.append(buffer, startPosn, appendLength);
txtLength += appendLength;
txtLength = newTxtLength;
}
} while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

if (bytesConsumed > Integer.MAX_VALUE) {

throw new IOException("Too many bytes before newline: " + bytesConsumed);
}
return (int)bytesConsumed;
Expand Down Expand Up @@ -342,8 +355,14 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
unsetNeedAdditionalRecordAfterSplit();
}
if (appendLength > 0) {
int newTxtLength = txtLength + appendLength;
if (str.getBytes().length < newTxtLength && Math.max(newTxtLength, txtLength << 1) > MAX_ARRAY_SIZE) {
// If str need to be resized but the target capacity is over VM limit, it will trigger OOM.
// In such case we will throw an IOException so the caller can deal with it.
throw new IOException("Too many bytes before newline: " + newTxtLength);
}
str.append(buffer, startPosn, appendLength);
txtLength += appendLength;
txtLength = newTxtLength;
}
if (bufferPosn >= bufferLength) {
if (delPosn > 0 && delPosn < recordDelimiterBytes.length) {
Expand Down

0 comments on commit f417683

Please sign in to comment.