Skip to content

Commit

Permalink
Throw IOException when the line is over the limit
Browse files Browse the repository at this point in the history
Fail the read when the text file line is over the maxLineLength limit.
  • Loading branch information
Ying Su committed Sep 11, 2018
1 parent 5fe57e5 commit 95bc0d1
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 2 deletions.
19 changes: 17 additions & 2 deletions src/main/java/org/apache/hadoop/util/LineReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume)
int appendLength = readLength - newlineLength;
if (appendLength > maxLineLength - txtLength) {
appendLength = maxLineLength - txtLength;
if (appendLength > 0) {
// We want to fail the read when the line length is over the limit.
throw new IOException("Too many bytes before newline: " + maxLineLength);
}
}
if (appendLength > 0) {
int newTxtLength = txtLength + appendLength;
Expand All @@ -257,7 +261,10 @@ private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume)
}
} while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

if (bytesConsumed > Integer.MAX_VALUE) {
if (newlineLength == 0 && bytesConsumed >= maxBytesToConsume) {
// It is possible that bytesConsumed is over the maxBytesToConsume but we
// didn't append anything to str.buffer. If we have consumed over maxBytesToConsume
// bytes but still haven't seen a line terminator, we will fail the read.
throw new IOException("Too many bytes before newline: " + bytesConsumed);
}
return (int)bytesConsumed;
Expand Down Expand Up @@ -342,6 +349,10 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
int appendLength = readLength - delPosn;
if (appendLength > maxLineLength - txtLength) {
appendLength = maxLineLength - txtLength;
if (appendLength > 0) {
// We want to fail the read when the line length is over the limit.
throw new IOException("Too many bytes before delimiter: " + maxLineLength);
}
}
bytesConsumed += ambiguousByteCount;
if (appendLength >= 0 && ambiguousByteCount > 0) {
Expand Down Expand Up @@ -371,7 +382,11 @@ private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume)
}
} while (delPosn < recordDelimiterBytes.length
&& bytesConsumed < maxBytesToConsume);
if (bytesConsumed > Integer.MAX_VALUE) {
if (delPosn < recordDelimiterBytes.length
&& bytesConsumed >= maxBytesToConsume) {
// It is possible that bytesConsumed is over the maxBytesToConsume but we
// didn't append anything to str.buffer. If we have consumed over maxBytesToConsume
// bytes but still haven't seen a line terminator, we will fail the read.
throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
}
return (int) bytesConsumed;
Expand Down
83 changes: 83 additions & 0 deletions src/test/java/com/facebook/presto/hadoop/TestLineReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package com.facebook.presto.hadoop;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.LineReader;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.testng.Assert.assertEquals;

public class TestLineReader {

@Test
public void testDefaultReaderMaxBytesConsumed()
{
byte[] input = "Hello world! Goodbye world!\n".getBytes(UTF_8);
InputStream in = new ByteArrayInputStream(input);
// Set the LineReader internal read buffer size 4 bytes
LineReader reader = new LineReader(in, 4);
Text str = new Text();
try {
reader.readLine(str, 0, 10);
}
catch (IOException e) {
// It should be 3 reads of 4 bytes each, so the final bytesConsumed is 12
assertEquals(e.getMessage(), "Too many bytes before newline: " + 12);
}
}

@Test
public void testDefaultReaderMaxLineLength()
{
byte[] input = "Hello world! Goodbye world!\n".getBytes(UTF_8);
InputStream in = new ByteArrayInputStream(input);
// Set the LineReader internal read buffer size 4 bytes
LineReader reader = new LineReader(in, 4);
Text str = new Text();
try {
reader.readLine(str, 10, 100);
}
catch (IOException e) {
assertEquals(e.getMessage(), "Too many bytes before newline: " + 10);
}
}

@Test
public void testCustomReaderMaxBytesConsumed()
{
byte[] input = "Hello world! Goodbye world!\n".getBytes(UTF_8);
byte[] delimiter = "!".getBytes(UTF_8);
InputStream in = new ByteArrayInputStream(input);
// Set the LineReader internal read buffer size 4 bytes
LineReader reader = new LineReader(in, 4, delimiter);
Text str = new Text();
try {
reader.readLine(str, 0, 5);
}
catch (IOException e) {
// It should be 2 reads of 4 bytes each, so the final bytesConsumed is 8
assertEquals(e.getMessage(), "Too many bytes before delimiter: " + 8);
}
}

@Test
public void testCustomReaderMaxLineLength()
{
byte[] input = "Hello world! Goodbye world!\n".getBytes(UTF_8);
byte[] delimiter = "!".getBytes(UTF_8);
InputStream in = new ByteArrayInputStream(input);
// Set the LineReader internal read buffer size 4 bytes
LineReader reader = new LineReader(in, 4, delimiter);
Text str = new Text();
try {
reader.readLine(str, 10, 100);
}
catch (IOException e) {
assertEquals(e.getMessage(), "Too many bytes before delimiter: " + 10);
}
}
}

0 comments on commit 95bc0d1

Please sign in to comment.